diff options
150 files changed, 3580 insertions, 2667 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 9325e189a215..04a78d9f8fe3 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -41,6 +41,7 @@ config INFINIBAND_USER_MEM bool depends on INFINIBAND_USER_ACCESS != n depends on MMU + select DMA_SHARED_BUFFER default y config INFINIBAND_ON_DEMAND_PAGING diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index ccf2670ef45e..8ab4eea5a0a5 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -40,5 +40,5 @@ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \ uverbs_std_types_srq.o \ uverbs_std_types_wq.o \ uverbs_std_types_qp.o -ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o +ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o umem_dmabuf.o ib_uverbs-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 7989b7e1d1c0..5c9fac7cf420 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -669,11 +669,10 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, * rdma_find_gid_by_port - Returns the GID entry attributes when it finds * a valid GID entry for given search parameters. It searches for the specified * GID value in the local software cache. - * @device: The device to query. + * @ib_dev: The device to query. * @gid: The GID value to search for. * @gid_type: The GID type to search for. - * @port_num: The port number of the device where the GID value should be - * searched. + * @port: The port number of the device where the GID value should be searched. * @ndev: In RoCE, the net device of the device. NULL means ignore. * * Returns sgid attributes if the GID is found with valid reference or @@ -719,7 +718,7 @@ EXPORT_SYMBOL(rdma_find_gid_by_port); /** * rdma_find_gid_by_filter - Returns the GID table attribute where a * specified GID value occurs - * @device: The device to query. + * @ib_dev: The device to query. * @gid: The GID value to search for. * @port: The port number of the device where the GID value could be * searched. @@ -728,6 +727,7 @@ EXPORT_SYMBOL(rdma_find_gid_by_port); * otherwise, we continue searching the GID table. It's guaranteed that * while filter is executed, ndev field is valid and the structure won't * change. filter is executed in an atomic context. filter must not be NULL. + * @context: Private data to pass into the call-back. * * rdma_find_gid_by_filter() searches for the specified GID value * of which the filter function returns true in the port's GID table. @@ -1253,7 +1253,6 @@ EXPORT_SYMBOL(rdma_get_gid_attr); * @entries: Entries where GID entries are returned. * @max_entries: Maximum number of entries that can be returned. * Entries array must be allocated to hold max_entries number of entries. - * @num_entries: Updated to the number of entries that were successfully read. * * Returns number of entries on success or appropriate error code. */ diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 98165589c8ab..be996dba040c 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -4333,7 +4333,7 @@ static int cm_add_one(struct ib_device *ib_device) unsigned long flags; int ret; int count = 0; - u8 i; + unsigned int i; cm_dev = kzalloc(struct_size(cm_dev, port, ib_device->phys_port_cnt), GFP_KERNEL); @@ -4345,7 +4345,7 @@ static int cm_add_one(struct ib_device *ib_device) cm_dev->going_down = 0; set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask); - for (i = 1; i <= ib_device->phys_port_cnt; i++) { + rdma_for_each_port (ib_device, i) { if (!rdma_cap_ib_cm(ib_device, i)) continue; @@ -4431,7 +4431,7 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) .clr_port_cap_mask = IB_PORT_CM_SUP }; unsigned long flags; - int i; + unsigned int i; write_lock_irqsave(&cm.device_lock, flags); list_del(&cm_dev->list); @@ -4441,7 +4441,7 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) cm_dev->going_down = 1; spin_unlock_irq(&cm.lock); - for (i = 1; i <= ib_device->phys_port_cnt; i++) { + rdma_for_each_port (ib_device, i) { if (!rdma_cap_ib_cm(ib_device, i)) continue; diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index c51b84b2d2f3..94096511599f 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -352,7 +352,13 @@ struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev) struct cma_multicast { struct rdma_id_private *id_priv; - struct ib_sa_multicast *sa_mc; + union { + struct ib_sa_multicast *sa_mc; + struct { + struct work_struct work; + struct rdma_cm_event event; + } iboe_join; + }; struct list_head list; void *context; struct sockaddr_storage addr; @@ -1823,6 +1829,8 @@ static void destroy_mc(struct rdma_id_private *id_priv, cma_igmp_send(ndev, &mgid, false); dev_put(ndev); } + + cancel_work_sync(&mc->iboe_join.work); } kfree(mc); } @@ -2683,6 +2691,28 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, return (id_priv->query_id < 0) ? id_priv->query_id : 0; } +static void cma_iboe_join_work_handler(struct work_struct *work) +{ + struct cma_multicast *mc = + container_of(work, struct cma_multicast, iboe_join.work); + struct rdma_cm_event *event = &mc->iboe_join.event; + struct rdma_id_private *id_priv = mc->id_priv; + int ret; + + mutex_lock(&id_priv->handler_mutex); + if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING || + READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL) + goto out_unlock; + + ret = cma_cm_event_handler(id_priv, event); + WARN_ON(ret); + +out_unlock: + mutex_unlock(&id_priv->handler_mutex); + if (event->event == RDMA_CM_EVENT_MULTICAST_JOIN) + rdma_destroy_ah_attr(&event->param.ud.ah_attr); +} + static void cma_work_handler(struct work_struct *_work) { struct cma_work *work = container_of(_work, struct cma_work, work); @@ -4478,10 +4508,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) cma_make_mc_event(status, id_priv, multicast, &event, mc); ret = cma_cm_event_handler(id_priv, &event); rdma_destroy_ah_attr(&event.param.ud.ah_attr); - if (ret) { - destroy_id_handler_unlock(id_priv); - return 0; - } + WARN_ON(ret); out: mutex_unlock(&id_priv->handler_mutex); @@ -4542,17 +4569,6 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv, rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); rec.join_state = mc->join_state; - if ((rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) && - (!ib_sa_sendonly_fullmem_support(&sa_client, - id_priv->id.device, - id_priv->id.port_num))) { - dev_warn( - &id_priv->id.device->dev, - "RDMA CM: port %u Unable to multicast join: SM doesn't support Send Only Full Member option\n", - id_priv->id.port_num); - return -EOPNOTSUPP; - } - comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE | IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL | @@ -4604,7 +4620,6 @@ static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid, static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, struct cma_multicast *mc) { - struct cma_work *work; struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; int err = 0; struct sockaddr *addr = (struct sockaddr *)&mc->addr; @@ -4618,10 +4633,6 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, if (cma_zero_addr(addr)) return -EINVAL; - work = kzalloc(sizeof *work, GFP_KERNEL); - if (!work) - return -ENOMEM; - gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num - rdma_start_port(id_priv->cma_dev->device)]; cma_iboe_set_mgid(addr, &ib.rec.mgid, gid_type); @@ -4632,10 +4643,9 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, if (dev_addr->bound_dev_if) ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); - if (!ndev) { - err = -ENODEV; - goto err_free; - } + if (!ndev) + return -ENODEV; + ib.rec.rate = iboe_get_rate(ndev); ib.rec.hop_limit = 1; ib.rec.mtu = iboe_get_mtu(ndev->mtu); @@ -4653,24 +4663,15 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, err = -ENOTSUPP; } dev_put(ndev); - if (err || !ib.rec.mtu) { - if (!err) - err = -EINVAL; - goto err_free; - } + if (err || !ib.rec.mtu) + return err ?: -EINVAL; + rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, &ib.rec.port_gid); - work->id = id_priv; - INIT_WORK(&work->work, cma_work_handler); - cma_make_mc_event(0, id_priv, &ib, &work->event, mc); - /* Balances with cma_id_put() in cma_work_handler */ - cma_id_get(id_priv); - queue_work(cma_wq, &work->work); + INIT_WORK(&mc->iboe_join.work, cma_iboe_join_work_handler); + cma_make_mc_event(0, id_priv, &ib, &mc->iboe_join.event, mc); + queue_work(cma_wq, &mc->iboe_join.work); return 0; - -err_free: - kfree(work); - return err; } int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c index 97a77ea8d3c9..e0d5e3bae458 100644 --- a/drivers/infiniband/core/cma_configfs.c +++ b/drivers/infiniband/core/cma_configfs.c @@ -204,7 +204,6 @@ static int make_cma_ports(struct cma_dev_group *cma_dev_group, unsigned int i; unsigned int ports_num; struct cma_dev_port_group *ports; - int err; ibdev = cma_get_ib_dev(cma_dev); @@ -215,10 +214,8 @@ static int make_cma_ports(struct cma_dev_group *cma_dev_group, ports = kcalloc(ports_num, sizeof(*cma_dev_group->ports), GFP_KERNEL); - if (!ports) { - err = -ENOMEM; - goto free; - } + if (!ports) + return -ENOMEM; for (i = 0; i < ports_num; i++) { char port_str[10]; @@ -234,12 +231,7 @@ static int make_cma_ports(struct cma_dev_group *cma_dev_group, } cma_dev_group->ports = ports; - return 0; -free: - kfree(ports); - cma_dev_group->ports = NULL; - return err; } static void release_cma_dev(struct config_item *item) diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index 92745522250e..f3a7c1f404af 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -10,30 +10,35 @@ #define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE | RDMA_COUNTER_MASK_PID) -static int __counter_set_mode(struct rdma_counter_mode *curr, +static int __counter_set_mode(struct rdma_port_counter *port_counter, enum rdma_nl_counter_mode new_mode, enum rdma_nl_counter_mask new_mask) { - if ((new_mode == RDMA_COUNTER_MODE_AUTO) && - ((new_mask & (~ALL_AUTO_MODE_MASKS)) || - (curr->mode != RDMA_COUNTER_MODE_NONE))) - return -EINVAL; + if (new_mode == RDMA_COUNTER_MODE_AUTO && port_counter->num_counters) + if (new_mask & ~ALL_AUTO_MODE_MASKS || + port_counter->mode.mode != RDMA_COUNTER_MODE_NONE) + return -EINVAL; - curr->mode = new_mode; - curr->mask = new_mask; + port_counter->mode.mode = new_mode; + port_counter->mode.mask = new_mask; return 0; } -/** +/* * rdma_counter_set_auto_mode() - Turn on/off per-port auto mode * - * When @on is true, the @mask must be set; When @on is false, it goes - * into manual mode if there's any counter, so that the user is able to - * manually access them. + * @dev: Device to operate + * @port: Port to use + * @mask: Mask to configure + * @extack: Message to the user + * + * Return 0 on success. */ int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port, - bool on, enum rdma_nl_counter_mask mask) + enum rdma_nl_counter_mask mask, + struct netlink_ext_ack *extack) { + enum rdma_nl_counter_mode mode = RDMA_COUNTER_MODE_AUTO; struct rdma_port_counter *port_counter; int ret; @@ -42,23 +47,23 @@ int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port, return -EOPNOTSUPP; mutex_lock(&port_counter->lock); - if (on) { - ret = __counter_set_mode(&port_counter->mode, - RDMA_COUNTER_MODE_AUTO, mask); - } else { - if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) { - ret = -EINVAL; - goto out; - } + if (mask) { + ret = __counter_set_mode(port_counter, mode, mask); + if (ret) + NL_SET_ERR_MSG( + extack, + "Turning on auto mode is not allowed when there is bound QP"); + goto out; + } - if (port_counter->num_counters) - ret = __counter_set_mode(&port_counter->mode, - RDMA_COUNTER_MODE_MANUAL, 0); - else - ret = __counter_set_mode(&port_counter->mode, - RDMA_COUNTER_MODE_NONE, 0); + if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) { + ret = -EINVAL; + goto out; } + mode = (port_counter->num_counters) ? RDMA_COUNTER_MODE_MANUAL : + RDMA_COUNTER_MODE_NONE; + ret = __counter_set_mode(port_counter, mode, 0); out: mutex_unlock(&port_counter->lock); return ret; @@ -122,8 +127,8 @@ static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u8 port, mutex_lock(&port_counter->lock); switch (mode) { case RDMA_COUNTER_MODE_MANUAL: - ret = __counter_set_mode(&port_counter->mode, - RDMA_COUNTER_MODE_MANUAL, 0); + ret = __counter_set_mode(port_counter, RDMA_COUNTER_MODE_MANUAL, + 0); if (ret) { mutex_unlock(&port_counter->lock); goto err_mode; @@ -170,8 +175,7 @@ static void rdma_counter_free(struct rdma_counter *counter) port_counter->num_counters--; if (!port_counter->num_counters && (port_counter->mode.mode == RDMA_COUNTER_MODE_MANUAL)) - __counter_set_mode(&port_counter->mode, RDMA_COUNTER_MODE_NONE, - 0); + __counter_set_mode(port_counter, RDMA_COUNTER_MODE_NONE, 0); mutex_unlock(&port_counter->lock); @@ -227,7 +231,7 @@ static void counter_history_stat_update(struct rdma_counter *counter) port_counter->hstats->value[i] += counter->stats->value[i]; } -/** +/* * rdma_get_counter_auto_mode - Find the counter that @qp should be bound * with in auto mode * @@ -274,7 +278,7 @@ static void counter_release(struct kref *kref) rdma_counter_free(counter); } -/** +/* * rdma_counter_bind_qp_auto - Check and bind the QP to a counter base on * the auto-mode rule */ @@ -311,7 +315,7 @@ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port) return 0; } -/** +/* * rdma_counter_unbind_qp - Unbind a qp from a counter * @force: * true - Decrease the counter ref-count anyway (e.g., qp destroy) @@ -380,7 +384,7 @@ next: return sum; } -/** +/* * rdma_counter_get_hwstat_value() - Get the sum value of all counters on a * specific port, including the running ones and history data */ @@ -436,7 +440,7 @@ static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev, return counter; } -/** +/* * rdma_counter_bind_qpn() - Bind QP @qp_num to counter @counter_id */ int rdma_counter_bind_qpn(struct ib_device *dev, u8 port, @@ -485,7 +489,7 @@ err: return ret; } -/** +/* * rdma_counter_bind_qpn_alloc() - Alloc a counter and bind QP @qp_num to it * The id of new counter is returned in @counter_id */ @@ -533,7 +537,7 @@ err: return ret; } -/** +/* * rdma_counter_unbind_qpn() - Unbind QP @qp_num from a counter */ int rdma_counter_unbind_qpn(struct ib_device *dev, u8 port, diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index e96f979e6d52..aac0fe14e1d9 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -848,6 +848,20 @@ static int setup_port_data(struct ib_device *device) return 0; } +/** + * ib_port_immutable_read() - Read rdma port's immutable data + * @dev: IB device + * @port: port number whose immutable data to read. It starts with index 1 and + * valid upto including rdma_end_port(). + */ +const struct ib_port_immutable* +ib_port_immutable_read(struct ib_device *dev, unsigned int port) +{ + WARN_ON(!rdma_is_port_valid(dev, port)); + return &dev->port_data[port].immutable; +} +EXPORT_SYMBOL(ib_port_immutable_read); + void ib_get_device_fw_str(struct ib_device *dev, char *str) { if (dev->ops.get_dev_fw_str) @@ -1887,9 +1901,9 @@ static int __ib_get_client_nl_info(struct ib_device *ibdev, /** * ib_get_client_nl_info - Fetch the nl_info from a client - * @device - IB device - * @client_name - Name of the client - * @res - Result of the query + * @ibdev: IB device + * @client_name: Name of the client + * @res: Result of the query */ int ib_get_client_nl_info(struct ib_device *ibdev, const char *client_name, struct ib_client_nl_info *res) @@ -2317,7 +2331,7 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter, up_read(&devices_rwsem); } -/** +/* * ib_enum_all_devs - enumerate all ib_devices * @cb: Callback to call for each found ib_device * @@ -2681,6 +2695,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, read_counters); SET_DEVICE_OP(dev_ops, reg_dm_mr); SET_DEVICE_OP(dev_ops, reg_user_mr); + SET_DEVICE_OP(dev_ops, reg_user_mr_dmabuf); SET_DEVICE_OP(dev_ops, req_ncomp_notif); SET_DEVICE_OP(dev_ops, req_notify_cq); SET_DEVICE_OP(dev_ops, rereg_user_mr); diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c index 46686990a827..30a0ff76b332 100644 --- a/drivers/infiniband/core/iwpm_msg.c +++ b/drivers/infiniband/core/iwpm_msg.c @@ -392,7 +392,7 @@ static const struct nla_policy resp_reg_policy[IWPM_NLA_RREG_PID_MAX] = { /** * iwpm_register_pid_cb - Process the port mapper response to * iwpm_register_pid query - * @skb: + * @skb: The socket buffer * @cb: Contains the received message (payload and netlink header) * * If successful, the function receives the userspace port mapper pid @@ -468,7 +468,7 @@ static const struct nla_policy resp_add_policy[IWPM_NLA_RMANAGE_MAPPING_MAX] = { /** * iwpm_add_mapping_cb - Process the port mapper response to * iwpm_add_mapping request - * @skb: + * @skb: The socket buffer * @cb: Contains the received message (payload and netlink header) */ int iwpm_add_mapping_cb(struct sk_buff *skb, struct netlink_callback *cb) @@ -545,7 +545,7 @@ static const struct nla_policy resp_query_policy[IWPM_NLA_RQUERY_MAPPING_MAX] = /** * iwpm_add_and_query_mapping_cb - Process the port mapper response to * iwpm_add_and_query_mapping request - * @skb: + * @skb: The socket buffer * @cb: Contains the received message (payload and netlink header) */ int iwpm_add_and_query_mapping_cb(struct sk_buff *skb, @@ -627,7 +627,7 @@ query_mapping_response_exit: /** * iwpm_remote_info_cb - Process remote connecting peer address info, which * the port mapper has received from the connecting peer - * @skb: + * @skb: The socket buffer * @cb: Contains the received message (payload and netlink header) * * Stores the IPv4/IPv6 address info in a hash table @@ -706,7 +706,7 @@ static const struct nla_policy resp_mapinfo_policy[IWPM_NLA_MAPINFO_REQ_MAX] = { /** * iwpm_mapping_info_cb - Process a notification that the userspace * port mapper daemon is started - * @skb: + * @skb: The socket buffer * @cb: Contains the received message (payload and netlink header) * * Using the received port mapper pid, send all the local mapping @@ -766,7 +766,7 @@ static const struct nla_policy ack_mapinfo_policy[IWPM_NLA_MAPINFO_NUM_MAX] = { /** * iwpm_ack_mapping_info_cb - Process the port mapper ack for * the provided local mapping info records - * @skb: + * @skb: The socket buffer * @cb: Contains the received message (payload and netlink header) */ int iwpm_ack_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb) @@ -796,7 +796,7 @@ static const struct nla_policy map_error_policy[IWPM_NLA_ERR_MAX] = { /** * iwpm_mapping_error_cb - Process port mapper notification for error * - * @skb: + * @skb: The socket buffer * @cb: Contains the received message (payload and netlink header) */ int iwpm_mapping_error_cb(struct sk_buff *skb, struct netlink_callback *cb) @@ -841,7 +841,7 @@ static const struct nla_policy hello_policy[IWPM_NLA_HELLO_MAX] = { /** * iwpm_hello_cb - Process a hello message from iwpmd * - * @skb: + * @skb: The socket buffer * @cb: Contains the received message (payload and netlink header) * * Using the received port mapper pid, send the kernel's abi_version diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c index 13495b43dbc1..f80e5550b51f 100644 --- a/drivers/infiniband/core/iwpm_util.c +++ b/drivers/infiniband/core/iwpm_util.c @@ -127,8 +127,8 @@ static struct hlist_head *get_mapinfo_hash_bucket(struct sockaddr_storage *, /** * iwpm_create_mapinfo - Store local and mapped IPv4/IPv6 address * info in a hash table - * @local_addr: Local ip/tcp address - * @mapped_addr: Mapped local ip/tcp address + * @local_sockaddr: Local ip/tcp address + * @mapped_sockaddr: Mapped local ip/tcp address * @nl_client: The index of the netlink client * @map_flags: IWPM mapping flags */ @@ -174,7 +174,7 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr, /** * iwpm_remove_mapinfo - Remove local and mapped IPv4/IPv6 address * info from the hash table - * @local_addr: Local ip/tcp address + * @local_sockaddr: Local ip/tcp address * @mapped_local_addr: Mapped local ip/tcp address * * Returns err code if mapping info is not found in the hash table, diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index 740f03ecc05d..57519ca6cd2c 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -721,6 +721,7 @@ EXPORT_SYMBOL(ib_sa_get_mcmember_rec); * member record and gid of the device. * @device: RDMA device * @port_num: Port of the rdma device to consider + * @rec: Multicast member record to use * @ndev: Optional netdevice, applicable only for RoCE * @gid_type: GID type to consider * @ah_attr: AH attribute to fillup on successful completion diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 08366e254b1d..d306049c22a2 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -1768,9 +1768,7 @@ static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]) mask = nla_get_u32( tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]); - - ret = rdma_counter_set_auto_mode(device, port, - mask ? true : false, mask); + ret = rdma_counter_set_auto_mode(device, port, mask, extack); if (ret) goto err_msg; } else { diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index ff1551b3cf61..ffabaf327242 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -201,8 +201,8 @@ EXPORT_SYMBOL(rdma_restrack_parent_name); /** * rdma_restrack_new() - Initializes new restrack entry to allow _put() interface * to release memory in fully automatic way. - * @res - Entry to initialize - * @type - REstrack type + * @res: Entry to initialize + * @type: REstrack type */ void rdma_restrack_new(struct rdma_restrack_entry *res, enum rdma_restrack_type type) diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c index 6b8364bb032d..34fff94eaa38 100644 --- a/drivers/infiniband/core/roce_gid_mgmt.c +++ b/drivers/infiniband/core/roce_gid_mgmt.c @@ -505,7 +505,7 @@ static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev, * rdma_roce_rescan_device - Rescan all of the network devices in the system * and add their gids, as needed, to the relevant RoCE devices. * - * @device: the rdma device + * @ib_dev: the rdma device */ void rdma_roce_rescan_device(struct ib_device *ib_dev) { diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index a96030b784eb..31156e22d3e7 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -410,7 +410,7 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, ctx->type = RDMA_RW_SIG_MR; ctx->nr_ops = 1; - ctx->reg = kcalloc(1, sizeof(*ctx->reg), GFP_KERNEL); + ctx->reg = kzalloc(sizeof(*ctx->reg), GFP_KERNEL); if (!ctx->reg) { ret = -ENOMEM; goto out_unmap_prot_sg; diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 89a831fa1885..9ef1a355131b 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1434,7 +1434,7 @@ enum opa_pr_supported { PR_IB_SUPPORTED }; -/** +/* * opa_pr_query_possible - Check if current PR query can be an OPA query. * * Retuns PR_NOT_SUPPORTED if a path record query is not @@ -1951,30 +1951,6 @@ err1: } EXPORT_SYMBOL(ib_sa_guid_info_rec_query); -bool ib_sa_sendonly_fullmem_support(struct ib_sa_client *client, - struct ib_device *device, - u8 port_num) -{ - struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); - struct ib_sa_port *port; - bool ret = false; - unsigned long flags; - - if (!sa_dev) - return ret; - - port = &sa_dev->port[port_num - sa_dev->start_port]; - - spin_lock_irqsave(&port->classport_lock, flags); - if ((port->classport_info.valid) && - (port->classport_info.data.type == RDMA_CLASS_PORT_INFO_IB)) - ret = ib_get_cpi_capmask2(&port->classport_info.data.ib) - & IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT; - spin_unlock_irqrestore(&port->classport_lock, flags); - return ret; -} -EXPORT_SYMBOL(ib_sa_sendonly_fullmem_support); - struct ib_classport_info_context { struct completion done; struct ib_sa_query *sa_query; diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 917338db7ac1..2dde99a9ba07 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -2,6 +2,7 @@ * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2020 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -278,6 +279,8 @@ void ib_umem_release(struct ib_umem *umem) { if (!umem) return; + if (umem->is_dmabuf) + return ib_umem_dmabuf_release(to_ib_umem_dmabuf(umem)); if (umem->is_odp) return ib_umem_odp_release(to_ib_umem_odp(umem)); diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c new file mode 100644 index 000000000000..f9b5162d9260 --- /dev/null +++ b/drivers/infiniband/core/umem_dmabuf.c @@ -0,0 +1,174 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +/* + * Copyright (c) 2020 Intel Corporation. All rights reserved. + */ + +#include <linux/dma-buf.h> +#include <linux/dma-resv.h> +#include <linux/dma-mapping.h> + +#include "uverbs.h" + +int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf) +{ + struct sg_table *sgt; + struct scatterlist *sg; + struct dma_fence *fence; + unsigned long start, end, cur = 0; + unsigned int nmap = 0; + int i; + + dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv); + + if (umem_dmabuf->sgt) + goto wait_fence; + + sgt = dma_buf_map_attachment(umem_dmabuf->attach, DMA_BIDIRECTIONAL); + if (IS_ERR(sgt)) + return PTR_ERR(sgt); + + /* modify the sg list in-place to match umem address and length */ + + start = ALIGN_DOWN(umem_dmabuf->umem.address, PAGE_SIZE); + end = ALIGN(umem_dmabuf->umem.address + umem_dmabuf->umem.length, + PAGE_SIZE); + for_each_sgtable_dma_sg(sgt, sg, i) { + if (start < cur + sg_dma_len(sg) && cur < end) + nmap++; + if (cur <= start && start < cur + sg_dma_len(sg)) { + unsigned long offset = start - cur; + + umem_dmabuf->first_sg = sg; + umem_dmabuf->first_sg_offset = offset; + sg_dma_address(sg) += offset; + sg_dma_len(sg) -= offset; + cur += offset; + } + if (cur < end && end <= cur + sg_dma_len(sg)) { + unsigned long trim = cur + sg_dma_len(sg) - end; + + umem_dmabuf->last_sg = sg; + umem_dmabuf->last_sg_trim = trim; + sg_dma_len(sg) -= trim; + break; + } + cur += sg_dma_len(sg); + } + + umem_dmabuf->umem.sg_head.sgl = umem_dmabuf->first_sg; + umem_dmabuf->umem.sg_head.nents = nmap; + umem_dmabuf->umem.nmap = nmap; + umem_dmabuf->sgt = sgt; + +wait_fence: + /* + * Although the sg list is valid now, the content of the pages + * may be not up-to-date. Wait for the exporter to finish + * the migration. + */ + fence = dma_resv_get_excl(umem_dmabuf->attach->dmabuf->resv); + if (fence) + return dma_fence_wait(fence, false); + + return 0; +} +EXPORT_SYMBOL(ib_umem_dmabuf_map_pages); + +void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf) +{ + dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv); + + if (!umem_dmabuf->sgt) + return; + + /* retore the original sg list */ + if (umem_dmabuf->first_sg) { + sg_dma_address(umem_dmabuf->first_sg) -= + umem_dmabuf->first_sg_offset; + sg_dma_len(umem_dmabuf->first_sg) += + umem_dmabuf->first_sg_offset; + umem_dmabuf->first_sg = NULL; + umem_dmabuf->first_sg_offset = 0; + } + if (umem_dmabuf->last_sg) { + sg_dma_len(umem_dmabuf->last_sg) += + umem_dmabuf->last_sg_trim; + umem_dmabuf->last_sg = NULL; + umem_dmabuf->last_sg_trim = 0; + } + + dma_buf_unmap_attachment(umem_dmabuf->attach, umem_dmabuf->sgt, + DMA_BIDIRECTIONAL); + + umem_dmabuf->sgt = NULL; +} +EXPORT_SYMBOL(ib_umem_dmabuf_unmap_pages); + +struct ib_umem_dmabuf *ib_umem_dmabuf_get(struct ib_device *device, + unsigned long offset, size_t size, + int fd, int access, + const struct dma_buf_attach_ops *ops) +{ + struct dma_buf *dmabuf; + struct ib_umem_dmabuf *umem_dmabuf; + struct ib_umem *umem; + unsigned long end; + struct ib_umem_dmabuf *ret = ERR_PTR(-EINVAL); + + if (check_add_overflow(offset, (unsigned long)size, &end)) + return ret; + + if (unlikely(!ops || !ops->move_notify)) + return ret; + + dmabuf = dma_buf_get(fd); + if (IS_ERR(dmabuf)) + return ERR_CAST(dmabuf); + + if (dmabuf->size < end) + goto out_release_dmabuf; + + umem_dmabuf = kzalloc(sizeof(*umem_dmabuf), GFP_KERNEL); + if (!umem_dmabuf) { + ret = ERR_PTR(-ENOMEM); + goto out_release_dmabuf; + } + + umem = &umem_dmabuf->umem; + umem->ibdev = device; + umem->length = size; + umem->address = offset; + umem->writable = ib_access_writable(access); + umem->is_dmabuf = 1; + + if (!ib_umem_num_pages(umem)) + goto out_free_umem; + + umem_dmabuf->attach = dma_buf_dynamic_attach( + dmabuf, + device->dma_device, + ops, + umem_dmabuf); + if (IS_ERR(umem_dmabuf->attach)) { + ret = ERR_CAST(umem_dmabuf->attach); + goto out_free_umem; + } + return umem_dmabuf; + +out_free_umem: + kfree(umem_dmabuf); + +out_release_dmabuf: + dma_buf_put(dmabuf); + return ret; +} +EXPORT_SYMBOL(ib_umem_dmabuf_get); + +void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf) +{ + struct dma_buf *dmabuf = umem_dmabuf->attach->dmabuf; + + dma_buf_detach(dmabuf, umem_dmabuf->attach); + dma_buf_put(dmabuf); + kfree(umem_dmabuf); +} diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 19104a675691..dd7f3b437c6b 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -379,6 +379,11 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf, mutex_lock(&file->mutex); + if (file->agents_dead) { + mutex_unlock(&file->mutex); + return -EIO; + } + while (list_empty(&file->recv_list)) { mutex_unlock(&file->mutex); @@ -392,6 +397,11 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf, mutex_lock(&file->mutex); } + if (file->agents_dead) { + mutex_unlock(&file->mutex); + return -EIO; + } + packet = list_entry(file->recv_list.next, struct ib_umad_packet, list); list_del(&packet->list); @@ -524,7 +534,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, agent = __get_agent(file, packet->mad.hdr.id); if (!agent) { - ret = -EINVAL; + ret = -EIO; goto err_up; } @@ -653,10 +663,14 @@ static __poll_t ib_umad_poll(struct file *filp, struct poll_table_struct *wait) /* we will always be able to post a MAD send */ __poll_t mask = EPOLLOUT | EPOLLWRNORM; + mutex_lock(&file->mutex); poll_wait(filp, &file->recv_wait, wait); if (!list_empty(&file->recv_list)) mask |= EPOLLIN | EPOLLRDNORM; + if (file->agents_dead) + mask = EPOLLERR; + mutex_unlock(&file->mutex); return mask; } @@ -1336,6 +1350,7 @@ static void ib_umad_kill_port(struct ib_umad_port *port) list_for_each_entry(file, &port->file_list, port_list) { mutex_lock(&file->mutex); file->agents_dead = 1; + wake_up_interruptible(&file->recv_wait); mutex_unlock(&file->mutex); for (id = 0; id < IB_UMAD_MAX_AGENTS; ++id) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 98a5d36813ff..f5b8be3bedde 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1382,7 +1382,7 @@ static int create_qp(struct uverbs_attr_bundle *attrs, if (has_sq) scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->send_cq_handle, attrs); - if (!ind_tbl) + if (!ind_tbl && cmd->qp_type != IB_QPT_XRC_INI) rcq = rcq ?: scq; pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, attrs); diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c index dd4e76b26c74..f782d5e1aa25 100644 --- a/drivers/infiniband/core/uverbs_std_types_mr.c +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2020, Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -182,6 +183,86 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_MR)( return IS_UVERBS_COPY_ERR(ret) ? ret : 0; } +static int UVERBS_HANDLER(UVERBS_METHOD_REG_DMABUF_MR)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = + uverbs_attr_get_uobject(attrs, UVERBS_ATTR_REG_DMABUF_MR_HANDLE); + struct ib_pd *pd = + uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DMABUF_MR_PD_HANDLE); + struct ib_device *ib_dev = pd->device; + + u64 offset, length, iova; + u32 fd, access_flags; + struct ib_mr *mr; + int ret; + + if (!ib_dev->ops.reg_user_mr_dmabuf) + return -EOPNOTSUPP; + + ret = uverbs_copy_from(&offset, attrs, + UVERBS_ATTR_REG_DMABUF_MR_OFFSET); + if (ret) + return ret; + + ret = uverbs_copy_from(&length, attrs, + UVERBS_ATTR_REG_DMABUF_MR_LENGTH); + if (ret) + return ret; + + ret = uverbs_copy_from(&iova, attrs, + UVERBS_ATTR_REG_DMABUF_MR_IOVA); + if (ret) + return ret; + + if ((offset & ~PAGE_MASK) != (iova & ~PAGE_MASK)) + return -EINVAL; + + ret = uverbs_copy_from(&fd, attrs, + UVERBS_ATTR_REG_DMABUF_MR_FD); + if (ret) + return ret; + + ret = uverbs_get_flags32(&access_flags, attrs, + UVERBS_ATTR_REG_DMABUF_MR_ACCESS_FLAGS, + IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_ATOMIC | + IB_ACCESS_RELAXED_ORDERING); + if (ret) + return ret; + + ret = ib_check_mr_access(ib_dev, access_flags); + if (ret) + return ret; + + mr = pd->device->ops.reg_user_mr_dmabuf(pd, offset, length, iova, fd, + access_flags, + &attrs->driver_udata); + if (IS_ERR(mr)) + return PTR_ERR(mr); + + mr->device = pd->device; + mr->pd = pd; + mr->type = IB_MR_TYPE_USER; + mr->uobject = uobj; + atomic_inc(&pd->usecnt); + + uobj->object = mr; + + uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_REG_DMABUF_MR_HANDLE); + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DMABUF_MR_RESP_LKEY, + &mr->lkey, sizeof(mr->lkey)); + if (ret) + return ret; + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DMABUF_MR_RESP_RKEY, + &mr->rkey, sizeof(mr->rkey)); + return ret; +} + DECLARE_UVERBS_NAMED_METHOD( UVERBS_METHOD_ADVISE_MR, UVERBS_ATTR_IDR(UVERBS_ATTR_ADVISE_MR_PD_HANDLE, @@ -247,6 +328,37 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_TYPE(u32), UA_MANDATORY)); +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_REG_DMABUF_MR, + UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DMABUF_MR_HANDLE, + UVERBS_OBJECT_MR, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DMABUF_MR_PD_HANDLE, + UVERBS_OBJECT_PD, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DMABUF_MR_OFFSET, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DMABUF_MR_LENGTH, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DMABUF_MR_IOVA, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DMABUF_MR_FD, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_REG_DMABUF_MR_ACCESS_FLAGS, + enum ib_access_flags), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DMABUF_MR_RESP_LKEY, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DMABUF_MR_RESP_RKEY, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY)); + DECLARE_UVERBS_NAMED_METHOD_DESTROY( UVERBS_METHOD_MR_DESTROY, UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_MR_HANDLE, @@ -257,10 +369,11 @@ DECLARE_UVERBS_NAMED_METHOD_DESTROY( DECLARE_UVERBS_NAMED_OBJECT( UVERBS_OBJECT_MR, UVERBS_TYPE_ALLOC_IDR(uverbs_free_mr), + &UVERBS_METHOD(UVERBS_METHOD_ADVISE_MR), &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG), &UVERBS_METHOD(UVERBS_METHOD_MR_DESTROY), - &UVERBS_METHOD(UVERBS_METHOD_ADVISE_MR), - &UVERBS_METHOD(UVERBS_METHOD_QUERY_MR)); + &UVERBS_METHOD(UVERBS_METHOD_QUERY_MR), + &UVERBS_METHOD(UVERBS_METHOD_REG_DMABUF_MR)); const struct uapi_definition uverbs_def_obj_mr[] = { UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MR, diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 9137a25bb521..28464c58738c 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2248,7 +2248,7 @@ static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid) struct ib_qp_init_attr init_attr = {}; struct ib_qp_attr attr = {}; int num_eth_ports = 0; - int port; + unsigned int port; /* If QP state >= init, it is assigned to a port and we can check this * port only. @@ -2263,7 +2263,7 @@ static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid) } /* Can't get a quick answer, iterate over all ports */ - for (port = 0; port < qp->device->phys_port_cnt; port++) + rdma_for_each_port(qp->device, port) if (rdma_port_get_link_layer(qp->device, port) != IB_LINK_LAYER_INFINIBAND) num_eth_ports++; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 401bdc9e931e..ba515efd4fdc 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -469,7 +469,6 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd) struct bnxt_re_mr *mr = NULL; dma_addr_t dma_addr = 0; struct ib_mw *mw; - u64 pbl_tbl; int rc; dma_addr = dma_map_single(dev, fence->va, BNXT_RE_FENCE_BYTES, @@ -504,9 +503,8 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd) mr->ib_mr.lkey = mr->qplib_mr.lkey; mr->qplib_mr.va = (u64)(unsigned long)fence->va; mr->qplib_mr.total_size = BNXT_RE_FENCE_BYTES; - pbl_tbl = dma_addr; - rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, &pbl_tbl, - BNXT_RE_FENCE_PBL_SIZE, false, PAGE_SIZE); + rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, NULL, + BNXT_RE_FENCE_PBL_SIZE, PAGE_SIZE); if (rc) { ibdev_err(&rdev->ibdev, "Failed to register fence-MR\n"); goto fail; @@ -3589,7 +3587,6 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *ib_pd, int mr_access_flags) struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); struct bnxt_re_dev *rdev = pd->rdev; struct bnxt_re_mr *mr; - u64 pbl = 0; int rc; mr = kzalloc(sizeof(*mr), GFP_KERNEL); @@ -3608,7 +3605,7 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *ib_pd, int mr_access_flags) mr->qplib_mr.hwq.level = PBL_LVL_MAX; mr->qplib_mr.total_size = -1; /* Infinte length */ - rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, &pbl, 0, false, + rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, NULL, 0, PAGE_SIZE); if (rc) goto fail_mr; @@ -3779,19 +3776,6 @@ int bnxt_re_dealloc_mw(struct ib_mw *ib_mw) return rc; } -static int fill_umem_pbl_tbl(struct ib_umem *umem, u64 *pbl_tbl_orig, - int page_shift) -{ - u64 *pbl_tbl = pbl_tbl_orig; - u64 page_size = BIT_ULL(page_shift); - struct ib_block_iter biter; - - rdma_umem_for_each_dma_block(umem, &biter, page_size) - *pbl_tbl++ = rdma_block_iter_dma_address(&biter); - - return pbl_tbl - pbl_tbl_orig; -} - /* uverbs */ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, u64 virt_addr, int mr_access_flags, @@ -3801,7 +3785,6 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, struct bnxt_re_dev *rdev = pd->rdev; struct bnxt_re_mr *mr; struct ib_umem *umem; - u64 *pbl_tbl = NULL; unsigned long page_size; int umem_pgs, rc; @@ -3846,39 +3829,19 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, } mr->qplib_mr.total_size = length; - if (page_size == BNXT_RE_PAGE_SIZE_4K && - length > BNXT_RE_MAX_MR_SIZE_LOW) { - ibdev_err(&rdev->ibdev, "Requested MR Sz:%llu Max sup:%llu", - length, (u64)BNXT_RE_MAX_MR_SIZE_LOW); - rc = -EINVAL; - goto free_umem; - } - umem_pgs = ib_umem_num_dma_blocks(umem, page_size); - pbl_tbl = kcalloc(umem_pgs, sizeof(*pbl_tbl), GFP_KERNEL); - if (!pbl_tbl) { - rc = -ENOMEM; - goto free_umem; - } - - /* Map umem buf ptrs to the PBL */ - umem_pgs = fill_umem_pbl_tbl(umem, pbl_tbl, order_base_2(page_size)); - rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, pbl_tbl, - umem_pgs, false, page_size); + rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, umem, + umem_pgs, page_size); if (rc) { ibdev_err(&rdev->ibdev, "Failed to register user MR"); - goto fail; + goto free_umem; } - kfree(pbl_tbl); - mr->ib_mr.lkey = mr->qplib_mr.lkey; mr->ib_mr.rkey = mr->qplib_mr.lkey; atomic_inc(&rdev->mr_count); return &mr->ib_mr; -fail: - kfree(pbl_tbl); free_umem: ib_umem_release(umem); free_mrw: diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 6316179583a6..049b3576302b 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -650,42 +650,32 @@ int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw, } int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, - u64 *pbl_tbl, int num_pbls, bool block, u32 buf_pg_size) + struct ib_umem *umem, int num_pbls, u32 buf_pg_size) { struct bnxt_qplib_rcfw *rcfw = res->rcfw; struct bnxt_qplib_hwq_attr hwq_attr = {}; struct bnxt_qplib_sg_info sginfo = {}; struct creq_register_mr_resp resp; struct cmdq_register_mr req; - int pg_ptrs, pages, i, rc; u16 cmd_flags = 0, level; - dma_addr_t **pbl_ptr; + int pages, rc; u32 pg_size; if (num_pbls) { + pages = roundup_pow_of_two(num_pbls); /* Allocate memory for the non-leaf pages to store buf ptrs. * Non-leaf pages always uses system PAGE_SIZE */ - pg_ptrs = roundup_pow_of_two(num_pbls); - pages = pg_ptrs >> MAX_PBL_LVL_1_PGS_SHIFT; - if (!pages) - pages++; - - if (pages > MAX_PBL_LVL_1_PGS) { - dev_err(&res->pdev->dev, - "SP: Reg MR: pages requested (0x%x) exceeded max (0x%x)\n", - pages, MAX_PBL_LVL_1_PGS); - return -ENOMEM; - } /* Free the hwq if it already exist, must be a rereg */ if (mr->hwq.max_elements) bnxt_qplib_free_hwq(res, &mr->hwq); /* Use system PAGE_SIZE */ hwq_attr.res = res; hwq_attr.depth = pages; - hwq_attr.stride = PAGE_SIZE; + hwq_attr.stride = buf_pg_size; hwq_attr.type = HWQ_TYPE_MR; hwq_attr.sginfo = &sginfo; + hwq_attr.sginfo->umem = umem; hwq_attr.sginfo->npages = pages; hwq_attr.sginfo->pgsize = PAGE_SIZE; hwq_attr.sginfo->pgshft = PAGE_SHIFT; @@ -695,11 +685,6 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, "SP: Reg MR memory allocation failed\n"); return -ENOMEM; } - /* Write to the hwq */ - pbl_ptr = (dma_addr_t **)mr->hwq.pbl_ptr; - for (i = 0; i < num_pbls; i++) - pbl_ptr[PTR_PG(i)][PTR_IDX(i)] = - (pbl_tbl[i] & PAGE_MASK) | PTU_PTE_VALID; } RCFW_CMD_PREP(req, REGISTER_MR, cmd_flags); @@ -711,7 +696,7 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, req.pbl = 0; pg_size = PAGE_SIZE; } else { - level = mr->hwq.level + 1; + level = mr->hwq.level; req.pbl = cpu_to_le64(mr->hwq.pbl[PBL_LVL_0].pg_map_arr[0]); } pg_size = buf_pg_size ? buf_pg_size : PAGE_SIZE; @@ -728,7 +713,7 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, req.mr_size = cpu_to_le64(mr->total_size); rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, - (void *)&resp, NULL, block); + (void *)&resp, NULL, false); if (rc) goto fail; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h index 967890cd81f2..bc228340684f 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h @@ -254,7 +254,7 @@ int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res, int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw, bool block); int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, - u64 *pbl_tbl, int num_pbls, bool block, u32 buf_pg_size); + struct ib_umem *umem, int num_pbls, u32 buf_pg_size); int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr); int bnxt_qplib_alloc_fast_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, int max); diff --git a/drivers/infiniband/hw/cxgb4/restrack.c b/drivers/infiniband/hw/cxgb4/restrack.c index b32e6516d65f..ff645b955a08 100644 --- a/drivers/infiniband/hw/cxgb4/restrack.c +++ b/drivers/infiniband/hw/cxgb4/restrack.c @@ -209,7 +209,7 @@ int c4iw_fill_res_cm_id_entry(struct sk_buff *msg, epcp = (struct c4iw_ep_common *)iw_cm_id->provider_data; if (!epcp) return 0; - uep = kcalloc(1, sizeof(*uep), GFP_KERNEL); + uep = kzalloc(sizeof(*uep), GFP_KERNEL); if (!uep) return 0; diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h index b199e4ac6cf9..fa38b34eddb8 100644 --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_ADMIN_CMDS_H_ @@ -161,8 +161,8 @@ struct efa_admin_create_qp_resp { u32 qp_handle; /* - * QP number in the given EFA virtual device. Least-significant bits - * (as needed according to max_qp) carry unique QP ID + * QP number in the given EFA virtual device. Least-significant bits (as + * needed according to max_qp) carry unique QP ID */ u16 qp_num; @@ -465,7 +465,7 @@ struct efa_admin_create_cq_cmd { /* * number of sub cqs - must be equal to sub_cqs_per_cq of queue - * attributes. + * attributes. */ u16 num_sub_cqs; @@ -563,12 +563,8 @@ struct efa_admin_acq_get_stats_resp { }; struct efa_admin_get_set_feature_common_desc { - /* - * 1:0 : select - 0x1 - current value; 0x3 - default - * value - * 7:3 : reserved3 - MBZ - */ - u8 flags; + /* MBZ */ + u8 reserved0; /* as appears in efa_admin_aq_feature_id */ u8 feature_id; @@ -823,12 +819,6 @@ enum efa_admin_aenq_group { EFA_ADMIN_AENQ_GROUPS_NUM = 5, }; -enum efa_admin_aenq_notification_syndrom { - EFA_ADMIN_SUSPEND = 0, - EFA_ADMIN_RESUME = 1, - EFA_ADMIN_UPDATE_HINTS = 2, -}; - struct efa_admin_mmio_req_read_less_resp { u16 req_id; @@ -909,9 +899,6 @@ struct efa_admin_host_info { #define EFA_ADMIN_CREATE_CQ_CMD_VIRT_MASK BIT(6) #define EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0) -/* get_set_feature_common_desc */ -#define EFA_ADMIN_GET_SET_FEATURE_COMMON_DESC_SELECT_MASK GENMASK(1, 0) - /* feature_device_attr_desc */ #define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK BIT(0) #define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RNR_RETRY_MASK BIT(1) diff --git a/drivers/infiniband/hw/efa/efa_admin_defs.h b/drivers/infiniband/hw/efa/efa_admin_defs.h index 29d53ed63b3e..78ff9389ae25 100644 --- a/drivers/infiniband/hw/efa/efa_admin_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_defs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_ADMIN_H_ @@ -82,7 +82,7 @@ struct efa_admin_acq_common_desc { /* * indicates to the driver which AQ entry has been consumed by the - * device and could be reused + * device and could be reused */ u16 sq_head_indx; }; diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c index 336bc2c57bb1..0d523ad736c7 100644 --- a/drivers/infiniband/hw/efa/efa_com.c +++ b/drivers/infiniband/hw/efa/efa_com.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause /* - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include "efa_com.h" @@ -20,9 +20,6 @@ #define EFA_CTRL_MINOR 0 #define EFA_CTRL_SUB_MINOR 1 -#define EFA_DMA_ADDR_TO_UINT32_LOW(x) ((u32)((u64)(x))) -#define EFA_DMA_ADDR_TO_UINT32_HIGH(x) ((u32)(((u64)(x)) >> 32)) - enum efa_cmd_status { EFA_CMD_SUBMITTED, EFA_CMD_COMPLETED, @@ -33,8 +30,6 @@ struct efa_comp_ctx { struct efa_admin_acq_entry *user_cqe; u32 comp_size; enum efa_cmd_status status; - /* status from the device */ - u8 comp_status; u8 cmd_opcode; u8 occupied; }; @@ -140,8 +135,8 @@ static int efa_com_admin_init_sq(struct efa_com_dev *edev) sq->db_addr = (u32 __iomem *)(edev->reg_bar + EFA_REGS_AQ_PROD_DB_OFF); - addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(sq->dma_addr); - addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(sq->dma_addr); + addr_high = upper_32_bits(sq->dma_addr); + addr_low = lower_32_bits(sq->dma_addr); writel(addr_low, edev->reg_bar + EFA_REGS_AQ_BASE_LO_OFF); writel(addr_high, edev->reg_bar + EFA_REGS_AQ_BASE_HI_OFF); @@ -174,8 +169,8 @@ static int efa_com_admin_init_cq(struct efa_com_dev *edev) cq->cc = 0; cq->phase = 1; - addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(cq->dma_addr); - addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(cq->dma_addr); + addr_high = upper_32_bits(cq->dma_addr); + addr_low = lower_32_bits(cq->dma_addr); writel(addr_low, edev->reg_bar + EFA_REGS_ACQ_BASE_LO_OFF); writel(addr_high, edev->reg_bar + EFA_REGS_ACQ_BASE_HI_OFF); @@ -215,8 +210,8 @@ static int efa_com_admin_init_aenq(struct efa_com_dev *edev, aenq->cc = 0; aenq->phase = 1; - addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(aenq->dma_addr); - addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(aenq->dma_addr); + addr_low = lower_32_bits(aenq->dma_addr); + addr_high = upper_32_bits(aenq->dma_addr); writel(addr_low, edev->reg_bar + EFA_REGS_AENQ_BASE_LO_OFF); writel(addr_high, edev->reg_bar + EFA_REGS_AENQ_BASE_HI_OFF); @@ -421,9 +416,7 @@ static void efa_com_handle_single_admin_completion(struct efa_com_admin_queue *a } comp_ctx->status = EFA_CMD_COMPLETED; - comp_ctx->comp_status = cqe->acq_common_descriptor.status; - if (comp_ctx->user_cqe) - memcpy(comp_ctx->user_cqe, cqe, comp_ctx->comp_size); + memcpy(comp_ctx->user_cqe, cqe, comp_ctx->comp_size); if (!test_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state)) complete(&comp_ctx->wait_event); @@ -521,7 +514,7 @@ static int efa_com_wait_and_process_admin_cq_polling(struct efa_comp_ctx *comp_c msleep(aq->poll_interval); } - err = efa_com_comp_status_to_errno(comp_ctx->comp_status); + err = efa_com_comp_status_to_errno(comp_ctx->user_cqe->acq_common_descriptor.status); out: efa_com_put_comp_ctx(aq, comp_ctx); return err; @@ -569,7 +562,7 @@ static int efa_com_wait_and_process_admin_cq_interrupts(struct efa_comp_ctx *com goto out; } - err = efa_com_comp_status_to_errno(comp_ctx->comp_status); + err = efa_com_comp_status_to_errno(comp_ctx->user_cqe->acq_common_descriptor.status); out: efa_com_put_comp_ctx(aq, comp_ctx); return err; @@ -641,8 +634,8 @@ int efa_com_cmd_exec(struct efa_com_admin_queue *aq, aq->efa_dev, "Failed to process command %s (opcode %u) comp_status %d err %d\n", efa_com_cmd_str(cmd->aq_common_descriptor.opcode), - cmd->aq_common_descriptor.opcode, comp_ctx->comp_status, - err); + cmd->aq_common_descriptor.opcode, + comp_ctx->user_cqe->acq_common_descriptor.status, err); atomic64_inc(&aq->stats.cmd_err); } @@ -795,7 +788,7 @@ err_destroy_comp_ctxt: * This method goes over the admin completion queue and wakes up * all the pending threads that wait on the commands wait event. * - * @note: Should be called after MSI-X interrupt. + * Note: Should be called after MSI-X interrupt. */ void efa_com_admin_q_comp_intr_handler(struct efa_com_dev *edev) { diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index c87b94ea2939..993cbf37e0b9 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1323,8 +1323,8 @@ CNTR_ELEM(#name, \ /** * hfi_addr_from_offset - return addr for readq/writeq - * @dd - the dd device - * @offset - the offset of the CSR within bar0 + * @dd: the dd device + * @offset: the offset of the CSR within bar0 * * This routine selects the appropriate base address * based on the indicated offset. @@ -1340,8 +1340,8 @@ static inline void __iomem *hfi1_addr_from_offset( /** * read_csr - read CSR at the indicated offset - * @dd - the dd device - * @offset - the offset of the CSR within bar0 + * @dd: the dd device + * @offset: the offset of the CSR within bar0 * * Return: the value read or all FF's if there * is no mapping @@ -1355,9 +1355,9 @@ u64 read_csr(const struct hfi1_devdata *dd, u32 offset) /** * write_csr - write CSR at the indicated offset - * @dd - the dd device - * @offset - the offset of the CSR within bar0 - * @value - value to write + * @dd: the dd device + * @offset: the offset of the CSR within bar0 + * @value: value to write */ void write_csr(const struct hfi1_devdata *dd, u32 offset, u64 value) { @@ -1373,8 +1373,8 @@ void write_csr(const struct hfi1_devdata *dd, u32 offset, u64 value) /** * get_csr_addr - return te iomem address for offset - * @dd - the dd device - * @offset - the offset of the CSR within bar0 + * @dd: the dd device + * @offset: the offset of the CSR within bar0 * * Return: The iomem address to use in subsequent * writeq/readq operations. @@ -8433,7 +8433,7 @@ static inline int check_packet_present(struct hfi1_ctxtdata *rcd) return hfi1_rcd_head(rcd) != tail; } -/** +/* * Common code for receive contexts interrupt handlers. * Update traces, increment kernel IRQ counter and * setup ASPM when needed. @@ -8447,7 +8447,7 @@ static void receive_interrupt_common(struct hfi1_ctxtdata *rcd) aspm_ctx_disable(rcd); } -/** +/* * __hfi1_rcd_eoi_intr() - Make HW issue receive interrupt * when there are packets present in the queue. When calling * with interrupts enabled please use hfi1_rcd_eoi_intr. @@ -8484,8 +8484,8 @@ static void hfi1_rcd_eoi_intr(struct hfi1_ctxtdata *rcd) /** * hfi1_netdev_rx_napi - napi poll function to move eoi inline - * @napi - pointer to napi object - * @budget - netdev budget + * @napi: pointer to napi object + * @budget: netdev budget */ int hfi1_netdev_rx_napi(struct napi_struct *napi, int budget) { @@ -10142,7 +10142,7 @@ u32 lrh_max_header_bytes(struct hfi1_devdata *dd) /* * Set Send Length - * @ppd - per port data + * @ppd: per port data * * Set the MTU by limiting how many DWs may be sent. The SendLenCheck* * registers compare against LRH.PktLen, so use the max bytes included @@ -14200,9 +14200,9 @@ u8 hfi1_get_qp_map(struct hfi1_devdata *dd, u8 idx) /** * init_qpmap_table - * @dd - device data - * @first_ctxt - first context - * @last_ctxt - first context + * @dd: device data + * @first_ctxt: first context + * @last_ctxt: first context * * This return sets the qpn mapping table that * is indexed by qpn[8:1]. @@ -14383,8 +14383,8 @@ no_qos: /** * init_qos - init RX qos - * @dd - device data - * @rmt - RSM map table + * @dd: device data + * @rmt: RSM map table * * This routine initializes Rule 0 and the RSM map table to implement * quality of service (qos). @@ -15022,8 +15022,7 @@ err_exit: /** * hfi1_init_dd() - Initialize most of the dd structure. - * @dev: the pci_dev for hfi1_ib device - * @ent: pci_device_id struct for this dev + * @dd: the dd device * * This is global, and is called directly at init to set up the * chip-specific function pointers for later use. @@ -15378,10 +15377,11 @@ static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate, /** * create_pbc - build a pbc for transmission + * @ppd: info of physical Hfi port * @flags: special case flags or-ed in built pbc - * @srate: static rate + * @srate_mbs: static rate * @vl: vl - * @dwlen: dword length (header words + data words + pbc words) + * @dw_len: dword length (header words + data words + pbc words) * * Create a PBC with the given flags, rate, VL, and length. * diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.c b/drivers/infiniband/hw/hfi1/exp_rcv.c index e9d5cc8b771a..91f13140ddf2 100644 --- a/drivers/infiniband/hw/hfi1/exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/exp_rcv.c @@ -50,7 +50,7 @@ /** * exp_tid_group_init - initialize exp_tid_set - * @set - the set + * @set: the set */ static void hfi1_exp_tid_set_init(struct exp_tid_set *set) { @@ -60,7 +60,7 @@ static void hfi1_exp_tid_set_init(struct exp_tid_set *set) /** * hfi1_exp_tid_group_init - initialize rcd expected receive - * @rcd - the rcd + * @rcd: the rcd */ void hfi1_exp_tid_group_init(struct hfi1_ctxtdata *rcd) { @@ -71,7 +71,7 @@ void hfi1_exp_tid_group_init(struct hfi1_ctxtdata *rcd) /** * alloc_ctxt_rcv_groups - initialize expected receive groups - * @rcd - the context to add the groupings to + * @rcd: the context to add the groupings to */ int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) { @@ -101,7 +101,7 @@ int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) /** * free_ctxt_rcv_groups - free expected receive groups - * @rcd - the context to free + * @rcd: the context to free * * The routine dismantles the expect receive linked * list and clears any tids associated with the receive diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 329ee4f48d95..3b7bbc7b9d10 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -1522,7 +1522,7 @@ int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit) * manage_rcvq - manage a context's receive queue * @uctxt: the context * @subctxt: the sub-context - * @start_stop: action to carry out + * @arg: start/stop action to carry out * * start_stop == 0 disables receive on the context, for use in queue * overflow conditions. start_stop==1 re-enables, to be used to diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c index 387305b768e9..5ba5c11459e7 100644 --- a/drivers/infiniband/hw/hfi1/intr.c +++ b/drivers/infiniband/hw/hfi1/intr.c @@ -91,9 +91,9 @@ static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd) /** * format_hwmsg - format a single hwerror message - * @msg message buffer - * @msgl length of message buffer - * @hwmsg message to add to message buffer + * @msg: message buffer + * @msgl: length of message buffer + * @hwmsg: message to add to message buffer */ static void format_hwmsg(char *msg, size_t msgl, const char *hwmsg) { @@ -104,11 +104,11 @@ static void format_hwmsg(char *msg, size_t msgl, const char *hwmsg) /** * hfi1_format_hwerrors - format hardware error messages for display - * @hwerrs hardware errors bit vector - * @hwerrmsgs hardware error descriptions - * @nhwerrmsgs number of hwerrmsgs - * @msg message buffer - * @msgl message buffer length + * @hwerrs: hardware errors bit vector + * @hwerrmsgs: hardware error descriptions + * @nhwerrmsgs: number of hwerrmsgs + * @msg: message buffer + * @msgl: message buffer length */ void hfi1_format_hwerrors(u64 hwerrs, const struct hfi1_hwerror_msgs *hwerrmsgs, size_t nhwerrmsgs, char *msg, size_t msgl) diff --git a/drivers/infiniband/hw/hfi1/iowait.c b/drivers/infiniband/hw/hfi1/iowait.c index 5836fe7b2817..111489802614 100644 --- a/drivers/infiniband/hw/hfi1/iowait.c +++ b/drivers/infiniband/hw/hfi1/iowait.c @@ -26,7 +26,7 @@ inline void iowait_clear_flag(struct iowait *wait, u32 flag) clear_bit(flag, &wait->flags); } -/** +/* * iowait_init() - initialize wait structure * @wait: wait struct to initialize * @tx_limit: limit for overflow queuing @@ -88,7 +88,7 @@ void iowait_cancel_work(struct iowait *w) /** * iowait_set_work_flag - set work flag based on leg - * @w - the iowait work struct + * @w: the iowait work struct */ int iowait_set_work_flag(struct iowait_work *w) { diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 3222e3acb79c..e2f2f7847aed 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -1341,7 +1341,7 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp, return 0; } -/** +/* * subn_set_opa_portinfo - set port information * @smp: the incoming SM packet * @ibdev: the infiniband device @@ -4902,6 +4902,8 @@ static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u8 port, * @in_grh: the global route header for this packet * @in_mad: the incoming MAD * @out_mad: any outgoing MAD reply + * @out_mad_size: size of the outgoing MAD reply + * @out_mad_pkey_index: used to apss back the packet key index * * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not * interested in processing. diff --git a/drivers/infiniband/hw/hfi1/msix.c b/drivers/infiniband/hw/hfi1/msix.c index d61ee853d215..cf3040bb177f 100644 --- a/drivers/infiniband/hw/hfi1/msix.c +++ b/drivers/infiniband/hw/hfi1/msix.c @@ -103,8 +103,8 @@ int msix_initialize(struct hfi1_devdata *dd) * @arg: context information for the IRQ * @handler: IRQ handler * @thread: IRQ thread handler (could be NULL) - * @idx: zero base idx if multiple devices are needed * @type: affinty IRQ type + * @name: IRQ name * * Allocated an MSIx vector if available, and then create the appropriate * meta data needed to keep track of the pci IRQ request. diff --git a/drivers/infiniband/hw/hfi1/netdev_rx.c b/drivers/infiniband/hw/hfi1/netdev_rx.c index 6d263c9749b3..1fb6e1a0e4e1 100644 --- a/drivers/infiniband/hw/hfi1/netdev_rx.c +++ b/drivers/infiniband/hw/hfi1/netdev_rx.c @@ -467,7 +467,7 @@ void *hfi1_netdev_get_data(struct hfi1_devdata *dd, int id) * hfi1_netdev_get_first_dat - Gets first entry with greater or equal id. * * @dd: hfi1 dev data - * @id: requested integer id up to INT_MAX + * @start_id: requested integer id up to INT_MAX */ void *hfi1_netdev_get_first_data(struct hfi1_devdata *dd, int *start_id) { diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 18d32f053d26..6f06e9920503 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -334,7 +334,7 @@ int pcie_speeds(struct hfi1_devdata *dd) return 0; } -/** +/* * Restore command and BARs after a reset has wiped them out * * Returns 0 on success, otherwise a negative error value @@ -393,7 +393,7 @@ error: return pcibios_err_to_errno(ret); } -/** +/* * Save BARs and command to rewrite after device reset * * Returns 0 on success, otherwise a negative error value diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c index 4a4ec2397857..14bfd8287f4a 100644 --- a/drivers/infiniband/hw/hfi1/pio_copy.c +++ b/drivers/infiniband/hw/hfi1/pio_copy.c @@ -55,6 +55,7 @@ /** * pio_copy - copy data block to MMIO space + * @dd: hfi1 dev data * @pbuf: a number of blocks allocated within a PIO send context * @pbc: PBC to send * @from: source, must be 8 byte aligned diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 681bb4e918c9..e037df911512 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -186,7 +186,7 @@ static void flush_iowait(struct rvt_qp *qp) write_sequnlock_irqrestore(lock, flags); } -/** +/* * This function is what we would push to the core layer if we wanted to be a * "first class citizen". Instead we hide this here and rely on Verbs ULPs * to blindly pass the MTU enum value from the PathRecord to us. @@ -289,9 +289,9 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, /** * hfi1_setup_wqe - set up the wqe - * @qp - The qp - * @wqe - The built wqe - * @call_send - Determine if the send should be posted or scheduled. + * @qp: The qp + * @wqe: The built wqe + * @call_send: Determine if the send should be posted or scheduled. * * Perform setup of the wqe. This is called * prior to inserting the wqe into the ring but after @@ -595,7 +595,7 @@ struct sdma_engine *qp_to_sdma_engine(struct rvt_qp *qp, u8 sc5) return sde; } -/* +/** * qp_to_send_context - map a qp to a send context * @qp: the QP * @sc5: the 5 bit sc @@ -912,8 +912,8 @@ void notify_error_qp(struct rvt_qp *qp) /** * hfi1_qp_iter_cb - callback for iterator - * @qp - the qp - * @v - the sl in low bits of v + * @qp: the qp + * @v: the sl in low bits of v * * This is called from the iterator callback to work * on an individual qp. diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c index 8386c84c2d92..38f311f855b5 100644 --- a/drivers/infiniband/hw/hfi1/qsfp.c +++ b/drivers/infiniband/hw/hfi1/qsfp.c @@ -242,7 +242,7 @@ static int i2c_bus_write(struct hfi1_devdata *dd, struct hfi1_i2c_bus *i2c, msgs[0].buf = offset_bytes; msgs[1].addr = slave_addr; - msgs[1].flags = I2C_M_NOSTART, + msgs[1].flags = I2C_M_NOSTART; msgs[1].len = len; msgs[1].buf = data; break; @@ -290,7 +290,7 @@ static int i2c_bus_read(struct hfi1_devdata *dd, struct hfi1_i2c_bus *bus, msgs[0].buf = offset_bytes; msgs[1].addr = slave_addr; - msgs[1].flags = I2C_M_RD, + msgs[1].flags = I2C_M_RD; msgs[1].len = len; msgs[1].buf = data; break; diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 1bb5f57152d3..0174b8ee9f00 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -421,6 +421,7 @@ bail: /** * hfi1_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC) * @qp: a pointer to the QP + * @ps: the current packet state * * Assumes s_lock is held. * @@ -1375,9 +1376,8 @@ static const hfi1_make_rc_ack hfi1_make_rc_ack_tbl[2] = { [HFI1_PKT_TYPE_16B] = &hfi1_make_rc_ack_16B }; -/** +/* * hfi1_send_rc_ack - Construct an ACK packet and send it - * @qp: a pointer to the QP * * This is called from hfi1_rc_rcv() and handle_receive_interrupt(). * Note that RDMA reads and atomics are handled in the @@ -1992,7 +1992,7 @@ static void update_qp_retry_state(struct rvt_qp *qp, u32 psn, u32 spsn, } } -/** +/* * do_rc_ack - process an incoming RC ACK * @qp: the QP the ACK came in on * @psn: the packet sequence number of the ACK @@ -2541,6 +2541,7 @@ static inline void rc_cancel_ack(struct rvt_qp *qp) * @opcode: the opcode for this packet * @psn: the packet sequence number for this packet * @diff: the difference between the PSN and the expected PSN + * @rcd: the receive context * * This is called from hfi1_rc_rcv() to process an unexpected * incoming RC packet for the given QP. diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 23ac6057b211..c3fa1814c6a8 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -260,6 +260,7 @@ static inline void hfi1_make_ruc_bth(struct rvt_qp *qp, * @qp: the queue pair * @ohdr: a pointer to the destination header memory * @bth0: bth0 passed in from the RC/UC builder + * @bth1: bth1 passed in from the RC/UC builder * @bth2: bth2 passed in from the RC/UC builder * @middle: non zero implies indicates ahg "could" be used * @ps: the current packet state @@ -348,6 +349,7 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp, * @qp: the queue pair * @ohdr: a pointer to the destination header memory * @bth0: bth0 passed in from the RC/UC builder + * @bth1: bth1 passed in from the RC/UC builder * @bth2: bth2 passed in from the RC/UC builder * @middle: non zero implies indicates ahg "could" be used * @ps: the current packet state @@ -455,11 +457,10 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, /** * hfi1_schedule_send_yield - test for a yield required for QP * send engine - * @timeout: Final time for timeout slice for jiffies * @qp: a pointer to QP * @ps: a pointer to a structure with commonly lookup values for * the the send engine progress - * @tid - true if it is the tid leg + * @tid: true if it is the tid leg * * This routine checks if the time slice for the QP has expired * for RC QPs, if so an additional work entry is queued. At this diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index a307d4c8b15a..46b5290b2839 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1740,7 +1740,7 @@ retry: sane = (hwhead == swhead); if (unlikely(!sane)) { - dd_dev_err(dd, "SDMA(%u) bad head (%s) hwhd=%hu swhd=%hu swtl=%hu cnt=%hu\n", + dd_dev_err(dd, "SDMA(%u) bad head (%s) hwhd=%u swhd=%u swtl=%u cnt=%u\n", sde->this_idx, use_dmahead ? "dma" : "kreg", hwhead, swhead, swtail, cnt); @@ -2448,11 +2448,11 @@ nodesc: * @sde: sdma engine to use * @wait: SE wait structure to use when full (may be NULL) * @tx_list: list of sdma_txreqs to submit - * @count: pointer to a u16 which, after return will contain the total number of - * sdma_txreqs removed from the tx_list. This will include sdma_txreqs - * whose SDMA descriptors are submitted to the ring and the sdma_txreqs - * which are added to SDMA engine flush list if the SDMA engine state is - * not running. + * @count_out: pointer to a u16 which, after return will contain the total number of + * sdma_txreqs removed from the tx_list. This will include sdma_txreqs + * whose SDMA descriptors are submitted to the ring and the sdma_txreqs + * which are added to SDMA engine flush list if the SDMA engine state is + * not running. * * The call submits the list into the ring. * diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 92aa2a9b3b5a..0b1f9e4d038b 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -309,7 +309,8 @@ int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit) /** * qp_to_rcd - determine the receive context used by a qp - * @qp - the qp + * @rdi: rvt dev struct + * @qp: the qp * * This routine returns the receive context associated * with a a qp's qpn. @@ -484,6 +485,7 @@ static struct rvt_qp *first_qp(struct hfi1_ctxtdata *rcd, /** * kernel_tid_waiters - determine rcd wait * @rcd: the receive context + * @queue: the queue to operate on * @qp: the head of the qp being processed * * This routine will return false IFF @@ -517,7 +519,9 @@ static bool kernel_tid_waiters(struct hfi1_ctxtdata *rcd, /** * dequeue_tid_waiter - dequeue the qp from the list - * @qp - the qp to remove the wait list + * @rcd: the receive context + * @queue: the queue to operate on + * @qp: the qp to remove the wait list * * This routine removes the indicated qp from the * wait list if it is there. @@ -549,6 +553,7 @@ static void dequeue_tid_waiter(struct hfi1_ctxtdata *rcd, /** * queue_qp_for_tid_wait - suspend QP on tid space * @rcd: the receive context + * @queue: the queue to operate on * @qp: the qp * * The qp is inserted at the tail of the rcd @@ -593,7 +598,7 @@ static void __trigger_tid_waiter(struct rvt_qp *qp) /** * tid_rdma_schedule_tid_wakeup - schedule wakeup for a qp - * @qp - the qp + * @qp: the qp * * trigger a schedule or a waiting qp in a deadlock * safe manner. The qp reference is held prior @@ -630,7 +635,7 @@ static void tid_rdma_schedule_tid_wakeup(struct rvt_qp *qp) /** * tid_rdma_trigger_resume - field a trigger work request - * @work - the work item + * @work: the work item * * Complete the off qp trigger processing by directly * calling the progress routine. @@ -654,7 +659,7 @@ static void tid_rdma_trigger_resume(struct work_struct *work) rvt_put_qp(qp); } -/** +/* * tid_rdma_flush_wait - unwind any tid space wait * * This is called when resetting a qp to @@ -693,8 +698,8 @@ void hfi1_tid_rdma_flush_wait(struct rvt_qp *qp) /* Flow functions */ /** * kern_reserve_flow - allocate a hardware flow - * @rcd - the context to use for allocation - * @last - the index of the preferred flow. Use RXE_NUM_TID_FLOWS to + * @rcd: the context to use for allocation + * @last: the index of the preferred flow. Use RXE_NUM_TID_FLOWS to * signify "don't care". * * Use a bit mask based allocation to reserve a hardware @@ -860,9 +865,10 @@ static u8 trdma_pset_order(struct tid_rdma_pageset *s) /** * tid_rdma_find_phys_blocks_4k - get groups base on mr info - * @npages - number of pages - * @pages - pointer to an array of page structs - * @list - page set array to return + * @flow: overall info for a TID RDMA segment + * @pages: pointer to an array of page structs + * @npages: number of pages + * @list: page set array to return * * This routine returns the number of groups associated with * the current sge information. This implementation is based @@ -949,10 +955,10 @@ static u32 tid_rdma_find_phys_blocks_4k(struct tid_rdma_flow *flow, /** * tid_flush_pages - dump out pages into pagesets - * @list - list of pagesets - * @idx - pointer to current page index - * @pages - number of pages to dump - * @sets - current number of pagesset + * @list: list of pagesets + * @idx: pointer to current page index + * @pages: number of pages to dump + * @sets: current number of pagesset * * This routine flushes out accumuated pages. * @@ -990,9 +996,10 @@ static u32 tid_flush_pages(struct tid_rdma_pageset *list, /** * tid_rdma_find_phys_blocks_8k - get groups base on mr info - * @pages - pointer to an array of page structs - * @npages - number of pages - * @list - page set array to return + * @flow: overall info for a TID RDMA segment + * @pages: pointer to an array of page structs + * @npages: number of pages + * @list: page set array to return * * This routine parses an array of pages to compute pagesets * in an 8k compatible way. @@ -1064,7 +1071,7 @@ static u32 tid_rdma_find_phys_blocks_8k(struct tid_rdma_flow *flow, return sets; } -/** +/* * Find pages for one segment of a sge array represented by @ss. The function * does not check the sge, the sge must have been checked for alignment with a * prior call to hfi1_kern_trdma_ok. Other sge checking is done as part of @@ -1598,7 +1605,7 @@ void hfi1_kern_exp_rcv_clear_all(struct tid_rdma_request *req) /** * hfi1_kern_exp_rcv_free_flows - free priviously allocated flow information - * @req - the tid rdma request to be cleaned + * @req: the tid rdma request to be cleaned */ static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req) { @@ -3435,7 +3442,7 @@ static u32 hfi1_compute_tid_rnr_timeout(struct rvt_qp *qp, u32 to_seg) return 0; } -/** +/* * Central place for resource allocation at TID write responder, * is called from write_req and write_data interrupt handlers as * well as the send thread when a queued QP is scheduled for diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 1fb918399da0..5b0f536b34e0 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -55,6 +55,7 @@ /** * hfi1_make_uc_req - construct a request packet (SEND, RDMA write) * @qp: a pointer to the QP + * @ps: the current packet state * * Assume s_lock is held. * @@ -291,12 +292,7 @@ bail_no_tx: /** * hfi1_uc_rcv - handle an incoming UC packet - * @ibp: the port the packet came in on - * @hdr: the header of the packet - * @rcv_flags: flags relevant to rcv processing - * @data: the packet data - * @tlen: the length of the packet - * @qp: the QP for this packet. + * @packet: the packet structure * * This is called from qp_rcv() to process an incoming UC packet * for the given QP. diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index e804af71b629..6ecb984c85fa 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -468,6 +468,7 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps, /** * hfi1_make_ud_req - construct a UD request packet * @qp: the QP + * @ps: the current packet state * * Assume s_lock is held. * @@ -840,12 +841,7 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5, /** * hfi1_ud_rcv - receive an incoming UD packet - * @ibp: the port the packet came in on - * @hdr: the packet header - * @rcv_flags: flags relevant to rcv processing - * @data: the packet data - * @tlen: the packet length - * @qp: the QP the packet came on + * @packet: the packet structure * * This is called from qp_rcv() to process an incoming UD packet * for the given QP. diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index b94fc7fd75a9..58dcab2679d9 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -154,12 +154,12 @@ void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) fd->entry_to_rb = NULL; } -/** +/* * Release pinned receive buffer pages. * - * @mapped - true if the pages have been DMA mapped. false otherwise. - * @idx - Index of the first page to unpin. - * @npages - No of pages to unpin. + * @mapped: true if the pages have been DMA mapped. false otherwise. + * @idx: Index of the first page to unpin. + * @npages: No of pages to unpin. * * If the pages have been DMA mapped (indicated by mapped parameter), their * info will be passed via a struct tid_rb_node. If they haven't been mapped, @@ -189,7 +189,7 @@ static void unpin_rcv_pages(struct hfi1_filedata *fd, fd->tid_n_pinned -= npages; } -/** +/* * Pin receive buffer pages. */ static int pin_rcv_pages(struct hfi1_filedata *fd, struct tid_user_buf *tidbuf) diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 3591923abebb..0dd4bb0a5a7e 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -729,7 +729,7 @@ bail_txadd: /** * update_tx_opstats - record stats by opcode - * @qp; the qp + * @qp: the qp * @ps: transmit packet state * @plen: the plen in dwords * @@ -1145,7 +1145,7 @@ static inline int egress_pkey_matches_entry(u16 pkey, u16 ent) * egress_pkey_check - check P_KEY of a packet * @ppd: Physical IB port data * @slid: SLID for packet - * @bkey: PKEY for header + * @pkey: PKEY for header * @sc5: SC for packet * @s_pkey_index: It will be used for look up optimization for kernel contexts * only. If it is negative value, then it means user contexts is calling this @@ -1206,7 +1206,7 @@ bad: return 1; } -/** +/* * get_send_routine - choose an egress routine * * Choose an egress routine based on QP type diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h index 5afee04fb02c..23c438cef40d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_common.h +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -32,6 +32,7 @@ #ifndef _HNS_ROCE_COMMON_H #define _HNS_ROCE_COMMON_H +#include <linux/bitfield.h> #define roce_write(dev, reg, val) writel((val), (dev)->reg_base + (reg)) #define roce_read(dev, reg) readl((dev)->reg_base + (reg)) @@ -65,6 +66,27 @@ #define hr_reg_enable(ptr, field) _hr_reg_enable(ptr, field) +#define _hr_reg_clear(ptr, field_type, field_h, field_l) \ + ({ \ + const field_type *_ptr = ptr; \ + *((__le32 *)_ptr + (field_h) / 32) &= \ + cpu_to_le32( \ + ~GENMASK((field_h) % 32, (field_l) % 32)) + \ + BUILD_BUG_ON_ZERO(((field_h) / 32) != \ + ((field_l) / 32)); \ + }) + +#define hr_reg_clear(ptr, field) _hr_reg_clear(ptr, field) + +#define _hr_reg_write(ptr, field_type, field_h, field_l, val) \ + ({ \ + _hr_reg_clear(ptr, field_type, field_h, field_l); \ + *((__le32 *)ptr + (field_h) / 32) |= cpu_to_le32(FIELD_PREP( \ + GENMASK((field_h) % 32, (field_l) % 32), val)); \ + }) + +#define hr_reg_write(ptr, field, val) _hr_reg_write(ptr, field, val) + #define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3 #define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4 @@ -342,8 +364,8 @@ #define ROCEE_TX_CMQ_BASEADDR_L_REG 0x07000 #define ROCEE_TX_CMQ_BASEADDR_H_REG 0x07004 #define ROCEE_TX_CMQ_DEPTH_REG 0x07008 -#define ROCEE_TX_CMQ_TAIL_REG 0x07010 -#define ROCEE_TX_CMQ_HEAD_REG 0x07014 +#define ROCEE_TX_CMQ_HEAD_REG 0x07010 +#define ROCEE_TX_CMQ_TAIL_REG 0x07014 #define ROCEE_RX_CMQ_BASEADDR_L_REG 0x07018 #define ROCEE_RX_CMQ_BASEADDR_H_REG 0x0701c diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 8533fc2d8df2..74fc4940b03a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -38,11 +38,74 @@ #include "hns_roce_hem.h" #include "hns_roce_common.h" +static u8 get_least_load_bankid_for_cq(struct hns_roce_bank *bank) +{ + u32 least_load = bank[0].inuse; + u8 bankid = 0; + u32 bankcnt; + u8 i; + + for (i = 1; i < HNS_ROCE_CQ_BANK_NUM; i++) { + bankcnt = bank[i].inuse; + if (bankcnt < least_load) { + least_load = bankcnt; + bankid = i; + } + } + + return bankid; +} + +static int alloc_cqn(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) +{ + struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; + struct hns_roce_bank *bank; + u8 bankid; + int id; + + mutex_lock(&cq_table->bank_mutex); + bankid = get_least_load_bankid_for_cq(cq_table->bank); + bank = &cq_table->bank[bankid]; + + id = ida_alloc_range(&bank->ida, bank->min, bank->max, GFP_KERNEL); + if (id < 0) { + mutex_unlock(&cq_table->bank_mutex); + return id; + } + + /* the lower 2 bits is bankid */ + hr_cq->cqn = (id << CQ_BANKID_SHIFT) | bankid; + bank->inuse++; + mutex_unlock(&cq_table->bank_mutex); + + return 0; +} + +static inline u8 get_cq_bankid(unsigned long cqn) +{ + /* The lower 2 bits of CQN are used to hash to different banks */ + return (u8)(cqn & GENMASK(1, 0)); +} + +static void free_cqn(struct hns_roce_dev *hr_dev, unsigned long cqn) +{ + struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; + struct hns_roce_bank *bank; + + bank = &cq_table->bank[get_cq_bankid(cqn)]; + + ida_free(&bank->ida, cqn >> CQ_BANKID_SHIFT); + + mutex_lock(&cq_table->bank_mutex); + bank->inuse--; + mutex_unlock(&cq_table->bank_mutex); +} + static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { + struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_cmd_mailbox *mailbox; - struct hns_roce_cq_table *cq_table; u64 mtts[MTT_MIN_COUNT] = { 0 }; dma_addr_t dma_handle; int ret; @@ -54,13 +117,6 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) return -EINVAL; } - cq_table = &hr_dev->cq_table; - ret = hns_roce_bitmap_alloc(&cq_table->bitmap, &hr_cq->cqn); - if (ret) { - ibdev_err(ibdev, "failed to alloc CQ bitmap, ret = %d.\n", ret); - return ret; - } - /* Get CQC memory HEM(Hardware Entry Memory) table */ ret = hns_roce_table_get(hr_dev, &cq_table->table, hr_cq->cqn); if (ret) { @@ -110,7 +166,6 @@ err_put: hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn); err_out: - hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR); return ret; } @@ -138,7 +193,6 @@ static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) wait_for_completion(&hr_cq->free); hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn); - hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR); } static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, @@ -152,7 +206,6 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, buf_attr.region[0].size = hr_cq->cq_depth * hr_cq->cqe_size; buf_attr.region[0].hopnum = hr_dev->caps.cqe_hop_num; buf_attr.region_count = 1; - buf_attr.fixed_page = true; ret = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr, hr_dev->caps.cqe_ba_pg_sz + HNS_HW_PAGE_SHIFT, @@ -298,11 +351,17 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, goto err_cq_buf; } + ret = alloc_cqn(hr_dev, hr_cq); + if (ret) { + ibdev_err(ibdev, "failed to alloc CQN, ret = %d.\n", ret); + goto err_cq_db; + } + ret = alloc_cqc(hr_dev, hr_cq); if (ret) { ibdev_err(ibdev, "failed to alloc CQ context, ret = %d.\n", ret); - goto err_cq_db; + goto err_cqn; } /* @@ -326,6 +385,8 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, err_cqc: free_cqc(hr_dev, hr_cq); +err_cqn: + free_cqn(hr_dev, hr_cq->cqn); err_cq_db: free_cq_db(hr_dev, hr_cq, udata); err_cq_buf: @@ -341,9 +402,11 @@ int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) if (hr_dev->hw->destroy_cq) hr_dev->hw->destroy_cq(ib_cq, udata); - free_cq_buf(hr_dev, hr_cq); - free_cq_db(hr_dev, hr_cq, udata); free_cqc(hr_dev, hr_cq); + free_cqn(hr_dev, hr_cq->cqn); + free_cq_db(hr_dev, hr_cq, udata); + free_cq_buf(hr_dev, hr_cq); + return 0; } @@ -402,18 +465,33 @@ void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type) complete(&hr_cq->free); } -int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev) +void hns_roce_init_cq_table(struct hns_roce_dev *hr_dev) { struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; + unsigned int reserved_from_bot; + unsigned int i; + mutex_init(&cq_table->bank_mutex); xa_init(&cq_table->array); - return hns_roce_bitmap_init(&cq_table->bitmap, hr_dev->caps.num_cqs, - hr_dev->caps.num_cqs - 1, - hr_dev->caps.reserved_cqs, 0); + reserved_from_bot = hr_dev->caps.reserved_cqs; + + for (i = 0; i < reserved_from_bot; i++) { + cq_table->bank[get_cq_bankid(i)].inuse++; + cq_table->bank[get_cq_bankid(i)].min++; + } + + for (i = 0; i < HNS_ROCE_CQ_BANK_NUM; i++) { + ida_init(&cq_table->bank[i].ida); + cq_table->bank[i].max = hr_dev->caps.num_cqs / + HNS_ROCE_CQ_BANK_NUM - 1; + } } void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev) { - hns_roce_bitmap_cleanup(&hr_dev->cq_table.bitmap); + int i; + + for (i = 0; i < HNS_ROCE_CQ_BANK_NUM; i++) + ida_destroy(&hr_dev->cq_table.bank[i].ida); } diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index ad8253245a85..3d6b7a2db496 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -54,6 +54,7 @@ /* Hardware specification only for v1 engine */ #define HNS_ROCE_MIN_CQE_NUM 0x40 #define HNS_ROCE_MIN_WQE_NUM 0x20 +#define HNS_ROCE_MIN_SRQ_WQE_NUM 1 /* Hardware specification only for v1 engine */ #define HNS_ROCE_MAX_INNER_MTPT_NUM 0x7 @@ -65,6 +66,8 @@ #define HNS_ROCE_CQE_WCMD_EMPTY_BIT 0x2 #define HNS_ROCE_MIN_CQE_CNT 16 +#define HNS_ROCE_RESERVED_SGE 1 + #define HNS_ROCE_MAX_IRQ_NUM 128 #define HNS_ROCE_SGE_IN_WQE 2 @@ -90,6 +93,7 @@ #define HNS_ROCE_MAX_PORTS 6 #define HNS_ROCE_GID_SIZE 16 #define HNS_ROCE_SGE_SIZE 16 +#define HNS_ROCE_DWQE_SIZE 65536 #define HNS_ROCE_HOP_NUM_0 0xff @@ -119,6 +123,9 @@ #define SRQ_DB_REG 0x230 #define HNS_ROCE_QP_BANK_NUM 8 +#define HNS_ROCE_CQ_BANK_NUM 4 + +#define CQ_BANKID_SHIFT 2 /* The chip implementation of the consumer index is calculated * according to twice the actual EQ depth @@ -163,44 +170,6 @@ enum hns_roce_event { HNS_ROCE_EVENT_TYPE_FLR = 0x15, }; -/* Local Work Queue Catastrophic Error,SUBTYPE 0x5 */ -enum { - HNS_ROCE_LWQCE_QPC_ERROR = 1, - HNS_ROCE_LWQCE_MTU_ERROR = 2, - HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR = 3, - HNS_ROCE_LWQCE_WQE_ADDR_ERROR = 4, - HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR = 5, - HNS_ROCE_LWQCE_SL_ERROR = 6, - HNS_ROCE_LWQCE_PORT_ERROR = 7, -}; - -/* Local Access Violation Work Queue Error,SUBTYPE 0x7 */ -enum { - HNS_ROCE_LAVWQE_R_KEY_VIOLATION = 1, - HNS_ROCE_LAVWQE_LENGTH_ERROR = 2, - HNS_ROCE_LAVWQE_VA_ERROR = 3, - HNS_ROCE_LAVWQE_PD_ERROR = 4, - HNS_ROCE_LAVWQE_RW_ACC_ERROR = 5, - HNS_ROCE_LAVWQE_KEY_STATE_ERROR = 6, - HNS_ROCE_LAVWQE_MR_OPERATION_ERROR = 7, -}; - -/* DOORBELL overflow subtype */ -enum { - HNS_ROCE_DB_SUBTYPE_SDB_OVF = 1, - HNS_ROCE_DB_SUBTYPE_SDB_ALM_OVF = 2, - HNS_ROCE_DB_SUBTYPE_ODB_OVF = 3, - HNS_ROCE_DB_SUBTYPE_ODB_ALM_OVF = 4, - HNS_ROCE_DB_SUBTYPE_SDB_ALM_EMP = 5, - HNS_ROCE_DB_SUBTYPE_ODB_ALM_EMP = 6, -}; - -enum { - /* RQ&SRQ related operations */ - HNS_ROCE_OPCODE_SEND_DATA_RECEIVE = 0x06, - HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE = 0x07, -}; - #define HNS_ROCE_CAP_FLAGS_EX_SHIFT 12 enum { @@ -253,9 +222,6 @@ enum { #define HNS_ROCE_CMD_SUCCESS 1 -#define HNS_ROCE_PORT_DOWN 0 -#define HNS_ROCE_PORT_UP 1 - /* The minimum page size is 4K for hardware */ #define HNS_HW_PAGE_SHIFT 12 #define HNS_HW_PAGE_SIZE (1 << HNS_HW_PAGE_SHIFT) @@ -332,7 +298,6 @@ struct hns_roce_buf_attr { } region[HNS_ROCE_MAX_BT_REGION]; unsigned int region_count; /* valid region count */ unsigned int page_shift; /* buffer page shift */ - bool fixed_page; /* decide page shift is fixed-size or maximum size */ unsigned int user_access; /* umem access flag */ bool mtt_only; /* only alloc buffer-required MTT memory */ }; @@ -393,6 +358,7 @@ struct hns_roce_wq { spinlock_t lock; u32 wqe_cnt; /* WQE num */ u32 max_gs; + u32 rsv_sge; int offset; int wqe_shift; /* WQE size */ u32 head; @@ -489,6 +455,8 @@ struct hns_roce_idx_que { struct hns_roce_mtr mtr; int entry_shift; unsigned long *bitmap; + u32 head; + u32 tail; }; struct hns_roce_srq { @@ -496,7 +464,9 @@ struct hns_roce_srq { unsigned long srqn; u32 wqe_cnt; int max_gs; + u32 rsv_sge; int wqe_shift; + u32 cqn; void __iomem *db_reg_l; atomic_t refcount; @@ -507,8 +477,6 @@ struct hns_roce_srq { u64 *wrid; struct hns_roce_idx_que idx_que; spinlock_t lock; - u16 head; - u16 tail; struct mutex mutex; void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event); }; @@ -536,9 +504,10 @@ struct hns_roce_qp_table { }; struct hns_roce_cq_table { - struct hns_roce_bitmap bitmap; struct xarray array; struct hns_roce_hem_table table; + struct hns_roce_bank bank[HNS_ROCE_CQ_BANK_NUM]; + struct mutex bank_mutex; }; struct hns_roce_srq_table { @@ -640,6 +609,10 @@ struct hns_roce_work { u32 queue_num; }; +enum { + HNS_ROCE_QP_CAP_DIRECT_WQE = BIT(5), +}; + struct hns_roce_qp { struct ib_qp ibqp; struct hns_roce_wq rq; @@ -647,7 +620,7 @@ struct hns_roce_qp { struct hns_roce_db sdb; unsigned long en_flags; u32 doorbell_qpn; - u32 sq_signal_bits; + enum ib_sig_type sq_signal_bits; struct hns_roce_wq sq; struct hns_roce_mtr mtr; @@ -779,7 +752,7 @@ struct hns_roce_caps { u32 max_cqes; u32 min_cqes; u32 min_wqes; - int reserved_cqs; + u32 reserved_cqs; int reserved_srqs; int num_aeq_vectors; int num_comp_vectors; @@ -911,8 +884,7 @@ struct hns_roce_hw { int (*write_mtpt)(struct hns_roce_dev *hr_dev, void *mb_buf, struct hns_roce_mr *mr, unsigned long mtpt_idx); int (*rereg_write_mtpt)(struct hns_roce_dev *hr_dev, - struct hns_roce_mr *mr, int flags, u32 pdn, - int mr_access_flags, u64 iova, u64 size, + struct hns_roce_mr *mr, int flags, void *mb_buf); int (*frmr_write_mtpt)(struct hns_roce_dev *hr_dev, void *mb_buf, struct hns_roce_mr *mr); @@ -945,11 +917,7 @@ struct hns_roce_hw { int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); int (*init_eq)(struct hns_roce_dev *hr_dev); void (*cleanup_eq)(struct hns_roce_dev *hr_dev); - void (*write_srqc)(struct hns_roce_dev *hr_dev, - struct hns_roce_srq *srq, u32 pdn, u16 xrcd, u32 cqn, - void *mb_buf, u64 *mtts_wqe, u64 *mtts_idx, - dma_addr_t dma_handle_wqe, - dma_addr_t dma_handle_idx); + int (*write_srqc)(struct hns_roce_srq *srq, void *mb_buf); int (*modify_srq)(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask, struct ib_udata *udata); @@ -982,6 +950,7 @@ struct hns_roce_dev { struct mutex pgdir_mutex; int irq[HNS_ROCE_MAX_IRQ_NUM]; u8 __iomem *reg_base; + void __iomem *mem_base; struct hns_roce_caps caps; struct xarray qp_table_xa; @@ -1067,7 +1036,7 @@ static inline struct hns_roce_srq *to_hr_srq(struct ib_srq *ibsrq) static inline void hns_roce_write64_k(__le32 val[2], void __iomem *dest) { - __raw_writeq(*(u64 *) val, dest); + writeq(*(u64 *)val, dest); } static inline struct hns_roce_qp @@ -1164,7 +1133,7 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev); int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev); -int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev); +void hns_roce_init_cq_table(struct hns_roce_dev *hr_dev); int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev); int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev); @@ -1281,7 +1250,6 @@ u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index); void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev); int hns_roce_init(struct hns_roce_dev *hr_dev); void hns_roce_exit(struct hns_roce_dev *hr_dev); - int hns_roce_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq); #endif /* _HNS_ROCE_DEVICE_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index edc9d6b98d95..cfd2e1b60c7f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -1075,9 +1075,8 @@ static struct roce_hem_item *hem_list_alloc_item(struct hns_roce_dev *hr_dev, return NULL; if (exist_bt) { - hem->addr = dma_alloc_coherent(hr_dev->dev, - count * BA_BYTE_LEN, - &hem->dma_addr, GFP_KERNEL); + hem->addr = dma_alloc_coherent(hr_dev->dev, count * BA_BYTE_LEN, + &hem->dma_addr, GFP_KERNEL); if (!hem->addr) { kfree(hem); return NULL; @@ -1336,6 +1335,10 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, if (ba_num < 1) return -ENOMEM; + if (ba_num > unit) + return -ENOBUFS; + + ba_num = min_t(int, ba_num, unit); INIT_LIST_HEAD(&temp_root); offset = r->offset; /* indicate to last region */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index f68585ff8e8a..5346fdca9473 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -43,6 +43,22 @@ #include "hns_roce_hem.h" #include "hns_roce_hw_v1.h" +/** + * hns_get_gid_index - Get gid index. + * @hr_dev: pointer to structure hns_roce_dev. + * @port: port, value range: 0 ~ MAX + * @gid_index: gid_index, value range: 0 ~ MAX + * Description: + * N ports shared gids, allocation method as follow: + * GID[0][0], GID[1][0],.....GID[N - 1][0], + * GID[0][0], GID[1][0],.....GID[N - 1][0], + * And so on + */ +u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index) +{ + return gid_index * hr_dev->caps.num_ports + port; +} + static void set_data_seg(struct hns_roce_wqe_data_seg *dseg, struct ib_sge *sg) { dseg->lkey = cpu_to_le32(sg->lkey); @@ -314,8 +330,6 @@ out: /* Set DB return */ if (likely(nreq)) { qp->sq.head += nreq; - /* Memory barrier */ - wmb(); roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_SQ_HEAD_M, SQ_DOORBELL_U32_4_SQ_HEAD_S, @@ -395,8 +409,6 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp, out: if (likely(nreq)) { hr_qp->rq.head += nreq; - /* Memory barrier */ - wmb(); if (ibqp->qp_type == IB_QPT_GSI) { __le32 tmp; @@ -1391,7 +1403,7 @@ static void hns_roce_free_mr_free(struct hns_roce_dev *hr_dev) /** * hns_roce_v1_reset - reset RoCE * @hr_dev: RoCE device struct pointer - * @enable: true -- drop reset, false -- reset + * @dereset: true -- drop reset, false -- reset * return 0 - success , negative --fail */ static int hns_roce_v1_reset(struct hns_roce_dev *hr_dev, bool dereset) @@ -1968,12 +1980,6 @@ static void __hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, if (nfreed) { hr_cq->cons_index += nfreed; - /* - * Make sure update of buffer contents is done before - * updating consumer index. - */ - wmb(); - hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index); } } @@ -2314,8 +2320,6 @@ int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) *hr_cq->tptr_addr = hr_cq->cons_index & ((hr_cq->cq_depth << 1) - 1); - /* Memroy barrier */ - wmb(); hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index); } @@ -3204,9 +3208,6 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, * need to hw to flash RQ HEAD by DB again */ if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { - /* Memory barrier */ - wmb(); - roce_set_field(doorbell[0], RQ_DOORBELL_U32_4_RQ_HEAD_M, RQ_DOORBELL_U32_4_RQ_HEAD_S, hr_qp->rq.head); roce_set_field(doorbell[1], RQ_DOORBELL_U32_8_QPN_M, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h index 46ab0a321d21..84383236e47d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h @@ -193,6 +193,49 @@ #define HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S 0 #define HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_M GENMASK(4, 0) +/* Local Work Queue Catastrophic Error,SUBTYPE 0x5 */ +enum { + HNS_ROCE_LWQCE_QPC_ERROR = 1, + HNS_ROCE_LWQCE_MTU_ERROR, + HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR, + HNS_ROCE_LWQCE_WQE_ADDR_ERROR, + HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR, + HNS_ROCE_LWQCE_SL_ERROR, + HNS_ROCE_LWQCE_PORT_ERROR, +}; + +/* Local Access Violation Work Queue Error,SUBTYPE 0x7 */ +enum { + HNS_ROCE_LAVWQE_R_KEY_VIOLATION = 1, + HNS_ROCE_LAVWQE_LENGTH_ERROR, + HNS_ROCE_LAVWQE_VA_ERROR, + HNS_ROCE_LAVWQE_PD_ERROR, + HNS_ROCE_LAVWQE_RW_ACC_ERROR, + HNS_ROCE_LAVWQE_KEY_STATE_ERROR, + HNS_ROCE_LAVWQE_MR_OPERATION_ERROR, +}; + +/* DOORBELL overflow subtype */ +enum { + HNS_ROCE_DB_SUBTYPE_SDB_OVF = 1, + HNS_ROCE_DB_SUBTYPE_SDB_ALM_OVF, + HNS_ROCE_DB_SUBTYPE_ODB_OVF, + HNS_ROCE_DB_SUBTYPE_ODB_ALM_OVF, + HNS_ROCE_DB_SUBTYPE_SDB_ALM_EMP, + HNS_ROCE_DB_SUBTYPE_ODB_ALM_EMP, +}; + +enum { + /* RQ&SRQ related operations */ + HNS_ROCE_OPCODE_SEND_DATA_RECEIVE = 0x06, + HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE, +}; + +enum { + HNS_ROCE_PORT_DOWN = 0, + HNS_ROCE_PORT_UP, +}; + struct hns_roce_cq_context { __le32 cqc_byte_4; __le32 cq_bt_l; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 833e1f259936..c3934abeb260 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -48,8 +48,8 @@ #include "hns_roce_hem.h" #include "hns_roce_hw_v2.h" -static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg, - struct ib_sge *sg) +static inline void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg, + struct ib_sge *sg) { dseg->lkey = cpu_to_le32(sg->lkey); dseg->addr = cpu_to_le64(sg->addr); @@ -99,16 +99,16 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, u64 pbl_ba; /* use ib_access_flags */ - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S, - wr->access & IB_ACCESS_MW_BIND ? 1 : 0); - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S, - wr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0); - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_RR_S, - wr->access & IB_ACCESS_REMOTE_READ ? 1 : 0); - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_RW_S, - wr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0); - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_LW_S, - wr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0); + roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_BIND_EN_S, + !!(wr->access & IB_ACCESS_MW_BIND)); + roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_ATOMIC_S, + !!(wr->access & IB_ACCESS_REMOTE_ATOMIC)); + roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_RR_S, + !!(wr->access & IB_ACCESS_REMOTE_READ)); + roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_RW_S, + !!(wr->access & IB_ACCESS_REMOTE_WRITE)); + roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_LW_S, + !!(wr->access & IB_ACCESS_LOCAL_WRITE)); /* Data structure reuse may lead to confusion */ pbl_ba = mr->pbl_mtr.hem_cfg.root_ba; @@ -121,12 +121,10 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, rc_sq_wqe->va = cpu_to_le64(wr->mr->iova); fseg->pbl_size = cpu_to_le32(mr->npages); - roce_set_field(fseg->mode_buf_pg_sz, - V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M, + roce_set_field(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M, V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S, to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift)); - roce_set_bit(fseg->mode_buf_pg_sz, - V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0); + roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0); } static void set_atomic_seg(const struct ib_send_wr *wr, @@ -361,7 +359,7 @@ static int check_send_valid(struct hns_roce_dev *hr_dev, } else if (unlikely(hr_qp->state == IB_QPS_RESET || hr_qp->state == IB_QPS_INIT || hr_qp->state == IB_QPS_RTR)) { - ibdev_err(ibdev, "failed to post WQE, QP state %hhu!\n", + ibdev_err(ibdev, "failed to post WQE, QP state %u!\n", hr_qp->state); return -EINVAL; } else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) { @@ -469,7 +467,6 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, int ret; valid_num_sge = calc_wr_sge_num(wr, &msg_len); - memset(ud_sq_wqe, 0, sizeof(*ud_sq_wqe)); ret = set_ud_opcode(ud_sq_wqe, wr); if (WARN_ON(ret)) @@ -503,6 +500,8 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, if (ret) return ret; + qp->sl = to_hr_ah(ud_wr(wr)->ah)->av.sl; + set_extend_sge(qp, wr->sg_list, &curr_idx, valid_num_sge); /* @@ -521,10 +520,12 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, return 0; } -static int set_rc_opcode(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, +static int set_rc_opcode(struct hns_roce_dev *hr_dev, + struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, const struct ib_send_wr *wr) { u32 ib_op = wr->opcode; + int ret = 0; rc_sq_wqe->immtdata = get_immtdata(wr); @@ -544,7 +545,10 @@ static int set_rc_opcode(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, rc_sq_wqe->va = cpu_to_le64(atomic_wr(wr)->remote_addr); break; case IB_WR_REG_MR: - set_frmr_seg(rc_sq_wqe, reg_wr(wr)); + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + set_frmr_seg(rc_sq_wqe, reg_wr(wr)); + else + ret = -EOPNOTSUPP; break; case IB_WR_LOCAL_INV: roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SO_S, 1); @@ -553,19 +557,23 @@ static int set_rc_opcode(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey); break; default: - return -EINVAL; + ret = -EINVAL; } + if (unlikely(ret)) + return ret; + roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OPCODE_M, V2_RC_SEND_WQE_BYTE_4_OPCODE_S, to_hr_opcode(ib_op)); - return 0; + return ret; } static inline int set_rc_wqe(struct hns_roce_qp *qp, const struct ib_send_wr *wr, void *wqe, unsigned int *sge_idx, unsigned int owner_bit) { + struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device); struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe; unsigned int curr_idx = *sge_idx; unsigned int valid_num_sge; @@ -573,11 +581,10 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, int ret; valid_num_sge = calc_wr_sge_num(wr, &msg_len); - memset(rc_sq_wqe, 0, sizeof(*rc_sq_wqe)); rc_sq_wqe->msg_len = cpu_to_le32(msg_len); - ret = set_rc_opcode(rc_sq_wqe, wr); + ret = set_rc_opcode(hr_dev, rc_sq_wqe, wr); if (WARN_ON(ret)) return ret; @@ -635,6 +642,8 @@ static inline void update_sq_db(struct hns_roce_dev *hr_dev, V2_DB_BYTE_4_TAG_S, qp->doorbell_qpn); roce_set_field(sq_db.byte_4, V2_DB_BYTE_4_CMD_M, V2_DB_BYTE_4_CMD_S, HNS_ROCE_V2_SQ_DB); + /* indicates data on new BAR, 0 : SQ doorbell, 1 : DWQE */ + roce_set_bit(sq_db.byte_4, V2_DB_FLAG_S, 0); roce_set_field(sq_db.parameter, V2_DB_PARAMETER_IDX_M, V2_DB_PARAMETER_IDX_S, qp->sq.head); roce_set_field(sq_db.parameter, V2_DB_PARAMETER_SL_M, @@ -644,6 +653,38 @@ static inline void update_sq_db(struct hns_roce_dev *hr_dev, } } +static void hns_roce_write512(struct hns_roce_dev *hr_dev, u64 *val, + u64 __iomem *dest) +{ +#define HNS_ROCE_WRITE_TIMES 8 + struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + struct hnae3_handle *handle = priv->handle; + const struct hnae3_ae_ops *ops = handle->ae_algo->ops; + int i; + + if (!hr_dev->dis_db && !ops->get_hw_reset_stat(handle)) + for (i = 0; i < HNS_ROCE_WRITE_TIMES; i++) + writeq_relaxed(*(val + i), dest + i); +} + +static void write_dwqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, + void *wqe) +{ + struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe; + + /* All kinds of DirectWQE have the same header field layout */ + roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FLAG_S, 1); + roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_DB_SL_L_M, + V2_RC_SEND_WQE_BYTE_4_DB_SL_L_S, qp->sl); + roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_DB_SL_H_M, + V2_RC_SEND_WQE_BYTE_4_DB_SL_H_S, qp->sl >> 2); + roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_M, + V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_S, qp->sq.head); + + hns_roce_write512(hr_dev, wqe, hr_dev->mem_base + + HNS_ROCE_DWQE_SIZE * qp->ibqp.qp_num); +} + static int hns_roce_v2_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr) @@ -708,9 +749,12 @@ out: if (likely(nreq)) { qp->sq.head += nreq; qp->next_sge = sge_idx; - /* Memory barrier */ - wmb(); - update_sq_db(hr_dev, qp); + + if (nreq == 1 && qp->sq.head == qp->sq.tail + 1 && + (qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE)) + write_dwqe(hr_dev, qp, wqe); + else + update_sq_db(hr_dev, qp); } spin_unlock_irqrestore(&qp->sq.lock, flags); @@ -721,14 +765,74 @@ out: static int check_recv_valid(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { + struct ib_device *ibdev = &hr_dev->ib_dev; + struct ib_qp *ibqp = &hr_qp->ibqp; + + if (unlikely(ibqp->qp_type != IB_QPT_RC && + ibqp->qp_type != IB_QPT_GSI && + ibqp->qp_type != IB_QPT_UD)) { + ibdev_err(ibdev, "unsupported qp type, qp_type = %d.\n", + ibqp->qp_type); + return -EOPNOTSUPP; + } + if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) return -EIO; - else if (hr_qp->state == IB_QPS_RESET) + + if (hr_qp->state == IB_QPS_RESET) return -EINVAL; return 0; } +static void fill_recv_sge_to_wqe(const struct ib_recv_wr *wr, void *wqe, + u32 max_sge, bool rsv) +{ + struct hns_roce_v2_wqe_data_seg *dseg = wqe; + u32 i, cnt; + + for (i = 0, cnt = 0; i < wr->num_sge; i++) { + /* Skip zero-length sge */ + if (!wr->sg_list[i].length) + continue; + set_data_seg_v2(dseg + cnt, wr->sg_list + i); + cnt++; + } + + /* Fill a reserved sge to make hw stop reading remaining segments */ + if (rsv) { + dseg[cnt].lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); + dseg[cnt].addr = 0; + dseg[cnt].len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH); + } else { + /* Clear remaining segments to make ROCEE ignore sges */ + if (cnt < max_sge) + memset(dseg + cnt, 0, + (max_sge - cnt) * HNS_ROCE_SGE_SIZE); + } +} + +static void fill_rq_wqe(struct hns_roce_qp *hr_qp, const struct ib_recv_wr *wr, + u32 wqe_idx, u32 max_sge) +{ + struct hns_roce_rinl_sge *sge_list; + void *wqe = NULL; + u32 i; + + wqe = hns_roce_get_recv_wqe(hr_qp, wqe_idx); + fill_recv_sge_to_wqe(wr, wqe, max_sge, hr_qp->rq.rsv_sge); + + /* rq support inline data */ + if (hr_qp->rq_inl_buf.wqe_cnt) { + sge_list = hr_qp->rq_inl_buf.wqe_list[wqe_idx].sg_list; + hr_qp->rq_inl_buf.wqe_list[wqe_idx].sge_cnt = (u32)wr->num_sge; + for (i = 0; i < wr->num_sge; i++) { + sge_list[i].addr = (void *)(u64)wr->sg_list[i].addr; + sge_list[i].len = wr->sg_list[i].length; + } + } +} + static int hns_roce_v2_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) @@ -736,14 +840,9 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_v2_wqe_data_seg *dseg; - struct hns_roce_rinl_sge *sge_list; + u32 wqe_idx, nreq, max_sge; unsigned long flags; - void *wqe = NULL; - u32 wqe_idx; - int nreq; int ret; - int i; spin_lock_irqsave(&hr_qp->rq.lock, flags); @@ -754,6 +853,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, goto out; } + max_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge; for (nreq = 0; wr; ++nreq, wr = wr->next) { if (unlikely(hns_roce_wq_overflow(&hr_qp->rq, nreq, hr_qp->ibqp.recv_cq))) { @@ -762,50 +862,22 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, goto out; } - wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1); - - if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) { + if (unlikely(wr->num_sge > max_sge)) { ibdev_err(ibdev, "num_sge = %d >= max_sge = %u.\n", - wr->num_sge, hr_qp->rq.max_gs); + wr->num_sge, max_sge); ret = -EINVAL; *bad_wr = wr; goto out; } - wqe = hns_roce_get_recv_wqe(hr_qp, wqe_idx); - dseg = (struct hns_roce_v2_wqe_data_seg *)wqe; - for (i = 0; i < wr->num_sge; i++) { - if (!wr->sg_list[i].length) - continue; - set_data_seg_v2(dseg, wr->sg_list + i); - dseg++; - } - - if (wr->num_sge < hr_qp->rq.max_gs) { - dseg->lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); - dseg->addr = 0; - } - - /* rq support inline data */ - if (hr_qp->rq_inl_buf.wqe_cnt) { - sge_list = hr_qp->rq_inl_buf.wqe_list[wqe_idx].sg_list; - hr_qp->rq_inl_buf.wqe_list[wqe_idx].sge_cnt = - (u32)wr->num_sge; - for (i = 0; i < wr->num_sge; i++) { - sge_list[i].addr = - (void *)(u64)wr->sg_list[i].addr; - sge_list[i].len = wr->sg_list[i].length; - } - } - + wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1); + fill_rq_wqe(hr_qp, wr, wqe_idx, max_sge); hr_qp->rq.wrid[wqe_idx] = wr->wr_id; } out: if (likely(nreq)) { hr_qp->rq.head += nreq; - /* Memory barrier */ - wmb(); /* * Hip08 hardware cannot flush the WQEs in RQ if the QP state @@ -829,41 +901,82 @@ out: return ret; } -static void *get_srq_wqe(struct hns_roce_srq *srq, int n) +static void *get_srq_wqe_buf(struct hns_roce_srq *srq, u32 n) { return hns_roce_buf_offset(srq->buf_mtr.kmem, n << srq->wqe_shift); } -static void *get_idx_buf(struct hns_roce_idx_que *idx_que, unsigned int n) +static void *get_idx_buf(struct hns_roce_idx_que *idx_que, u32 n) { return hns_roce_buf_offset(idx_que->mtr.kmem, n << idx_que->entry_shift); } -static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index) +static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, u32 wqe_index) { /* always called with interrupts disabled. */ spin_lock(&srq->lock); bitmap_clear(srq->idx_que.bitmap, wqe_index, 1); - srq->tail++; + srq->idx_que.tail++; spin_unlock(&srq->lock); } -static int find_empty_entry(struct hns_roce_idx_que *idx_que, - unsigned long size) +static int hns_roce_srqwq_overflow(struct hns_roce_srq *srq) { - int wqe_idx; + struct hns_roce_idx_que *idx_que = &srq->idx_que; - if (unlikely(bitmap_full(idx_que->bitmap, size))) + return idx_que->head - idx_que->tail >= srq->wqe_cnt; +} + +static int check_post_srq_valid(struct hns_roce_srq *srq, u32 max_sge, + const struct ib_recv_wr *wr) +{ + struct ib_device *ib_dev = srq->ibsrq.device; + + if (unlikely(wr->num_sge > max_sge)) { + ibdev_err(ib_dev, + "failed to check sge, wr->num_sge = %d, max_sge = %u.\n", + wr->num_sge, max_sge); + return -EINVAL; + } + + if (unlikely(hns_roce_srqwq_overflow(srq))) { + ibdev_err(ib_dev, + "failed to check srqwq status, srqwq is full.\n"); + return -ENOMEM; + } + + return 0; +} + +static int get_srq_wqe_idx(struct hns_roce_srq *srq, u32 *wqe_idx) +{ + struct hns_roce_idx_que *idx_que = &srq->idx_que; + u32 pos; + + pos = find_first_zero_bit(idx_que->bitmap, srq->wqe_cnt); + if (unlikely(pos == srq->wqe_cnt)) return -ENOSPC; - wqe_idx = find_first_zero_bit(idx_que->bitmap, size); + bitmap_set(idx_que->bitmap, pos, 1); + *wqe_idx = pos; + return 0; +} - bitmap_set(idx_que->bitmap, wqe_idx, 1); +static void fill_wqe_idx(struct hns_roce_srq *srq, unsigned int wqe_idx) +{ + struct hns_roce_idx_que *idx_que = &srq->idx_que; + unsigned int head; + __le32 *buf; - return wqe_idx; + head = idx_que->head & (srq->wqe_cnt - 1); + + buf = get_idx_buf(idx_que, head); + *buf = cpu_to_le32(wqe_idx); + + idx_que->head++; } static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, @@ -872,77 +985,42 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, { struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); struct hns_roce_srq *srq = to_hr_srq(ibsrq); - struct hns_roce_v2_wqe_data_seg *dseg; struct hns_roce_v2_db srq_db; unsigned long flags; - unsigned int ind; - __le32 *srq_idx; int ret = 0; - int wqe_idx; + u32 max_sge; + u32 wqe_idx; void *wqe; - int nreq; - int i; + u32 nreq; spin_lock_irqsave(&srq->lock, flags); - ind = srq->head & (srq->wqe_cnt - 1); - + max_sge = srq->max_gs - srq->rsv_sge; for (nreq = 0; wr; ++nreq, wr = wr->next) { - if (unlikely(wr->num_sge >= srq->max_gs)) { - ret = -EINVAL; - *bad_wr = wr; - break; - } - - if (unlikely(srq->head == srq->tail)) { - ret = -ENOMEM; + ret = check_post_srq_valid(srq, max_sge, wr); + if (ret) { *bad_wr = wr; break; } - wqe_idx = find_empty_entry(&srq->idx_que, srq->wqe_cnt); - if (unlikely(wqe_idx < 0)) { - ret = -ENOMEM; + ret = get_srq_wqe_idx(srq, &wqe_idx); + if (unlikely(ret)) { *bad_wr = wr; break; } - wqe = get_srq_wqe(srq, wqe_idx); - dseg = (struct hns_roce_v2_wqe_data_seg *)wqe; - - for (i = 0; i < wr->num_sge; ++i) { - dseg[i].len = cpu_to_le32(wr->sg_list[i].length); - dseg[i].lkey = cpu_to_le32(wr->sg_list[i].lkey); - dseg[i].addr = cpu_to_le64(wr->sg_list[i].addr); - } - - if (wr->num_sge < srq->max_gs) { - dseg[i].len = 0; - dseg[i].lkey = cpu_to_le32(0x100); - dseg[i].addr = 0; - } - - srq_idx = get_idx_buf(&srq->idx_que, ind); - *srq_idx = cpu_to_le32(wqe_idx); - + wqe = get_srq_wqe_buf(srq, wqe_idx); + fill_recv_sge_to_wqe(wr, wqe, max_sge, srq->rsv_sge); + fill_wqe_idx(srq, wqe_idx); srq->wrid[wqe_idx] = wr->wr_id; - ind = (ind + 1) & (srq->wqe_cnt - 1); } if (likely(nreq)) { - srq->head += nreq; - - /* - * Make sure that descriptors are written before - * doorbell record. - */ - wmb(); - srq_db.byte_4 = cpu_to_le32(HNS_ROCE_V2_SRQ_DB << V2_DB_BYTE_4_CMD_S | (srq->srqn & V2_DB_BYTE_4_TAG_M)); srq_db.parameter = - cpu_to_le32(srq->head & V2_DB_PARAMETER_IDX_M); + cpu_to_le32(srq->idx_que.head & V2_DB_PARAMETER_IDX_M); hns_roce_write64(hr_dev, (__le32 *)&srq_db, srq->db_reg_l); } @@ -1059,15 +1137,6 @@ static int hns_roce_v2_rst_process_cmd(struct hns_roce_dev *hr_dev) return 0; } -static int hns_roce_cmq_space(struct hns_roce_v2_cmq_ring *ring) -{ - int ntu = ring->next_to_use; - int ntc = ring->next_to_clean; - int used = (ntu - ntc + ring->desc_num) % ring->desc_num; - - return ring->desc_num - used - 1; -} - static int hns_roce_alloc_cmq_desc(struct hns_roce_dev *hr_dev, struct hns_roce_v2_cmq_ring *ring) { @@ -1107,8 +1176,7 @@ static int hns_roce_init_cmq_ring(struct hns_roce_dev *hr_dev, bool ring_type) &priv->cmq.csq : &priv->cmq.crq; ring->flag = ring_type; - ring->next_to_clean = 0; - ring->next_to_use = 0; + ring->head = 0; return hns_roce_alloc_cmq_desc(hr_dev, ring); } @@ -1207,34 +1275,10 @@ static void hns_roce_cmq_setup_basic_desc(struct hns_roce_cmq_desc *desc, static int hns_roce_cmq_csq_done(struct hns_roce_dev *hr_dev) { - u32 head = roce_read(hr_dev, ROCEE_TX_CMQ_HEAD_REG); + u32 tail = roce_read(hr_dev, ROCEE_TX_CMQ_TAIL_REG); struct hns_roce_v2_priv *priv = hr_dev->priv; - return head == priv->cmq.csq.next_to_use; -} - -static int hns_roce_cmq_csq_clean(struct hns_roce_dev *hr_dev) -{ - struct hns_roce_v2_priv *priv = hr_dev->priv; - struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq; - struct hns_roce_cmq_desc *desc; - u16 ntc = csq->next_to_clean; - u32 head; - int clean = 0; - - desc = &csq->desc[ntc]; - head = roce_read(hr_dev, ROCEE_TX_CMQ_HEAD_REG); - while (head != ntc) { - memset(desc, 0, sizeof(*desc)); - ntc++; - if (ntc == csq->desc_num) - ntc = 0; - desc = &csq->desc[ntc]; - clean++; - } - csq->next_to_clean = ntc; - - return clean; + return tail == priv->cmq.csq.head; } static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, @@ -1242,42 +1286,26 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, { struct hns_roce_v2_priv *priv = hr_dev->priv; struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq; - struct hns_roce_cmq_desc *desc_to_use; - bool complete = false; u32 timeout = 0; - int handle = 0; u16 desc_ret; - int ret = 0; - int ntc; + u32 tail; + int ret; + int i; spin_lock_bh(&csq->lock); - if (num > hns_roce_cmq_space(csq)) { - spin_unlock_bh(&csq->lock); - return -EBUSY; - } - - /* - * Record the location of desc in the cmq for this time - * which will be use for hardware to write back - */ - ntc = csq->next_to_use; + tail = csq->head; - while (handle < num) { - desc_to_use = &csq->desc[csq->next_to_use]; - *desc_to_use = desc[handle]; - dev_dbg(hr_dev->dev, "set cmq desc:\n"); - csq->next_to_use++; - if (csq->next_to_use == csq->desc_num) - csq->next_to_use = 0; - handle++; + for (i = 0; i < num; i++) { + csq->desc[csq->head++] = desc[i]; + if (csq->head == csq->desc_num) + csq->head = 0; } /* Write to hardware */ - roce_write(hr_dev, ROCEE_TX_CMQ_TAIL_REG, csq->next_to_use); + roce_write(hr_dev, ROCEE_TX_CMQ_HEAD_REG, csq->head); - /* - * If the command is sync, wait for the firmware to write back, + /* If the command is sync, wait for the firmware to write back, * if multi descriptors to be sent, use the first one to check */ if (le16_to_cpu(desc->flag) & HNS_ROCE_CMD_FLAG_NO_INTR) { @@ -1285,39 +1313,34 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, if (hns_roce_cmq_csq_done(hr_dev)) break; udelay(1); - timeout++; - } while (timeout < priv->cmq.tx_timeout); + } while (++timeout < priv->cmq.tx_timeout); } if (hns_roce_cmq_csq_done(hr_dev)) { - complete = true; - handle = 0; - while (handle < num) { - /* get the result of hardware write back */ - desc_to_use = &csq->desc[ntc]; - desc[handle] = *desc_to_use; - dev_dbg(hr_dev->dev, "Get cmq desc:\n"); - desc_ret = le16_to_cpu(desc[handle].retval); - if (desc_ret == CMD_EXEC_SUCCESS) - ret = 0; - else - ret = -EIO; - priv->cmq.last_status = desc_ret; - ntc++; - handle++; - if (ntc == csq->desc_num) - ntc = 0; + for (ret = 0, i = 0; i < num; i++) { + /* check the result of hardware write back */ + desc[i] = csq->desc[tail++]; + if (tail == csq->desc_num) + tail = 0; + + desc_ret = le16_to_cpu(desc[i].retval); + if (likely(desc_ret == CMD_EXEC_SUCCESS)) + continue; + + dev_err_ratelimited(hr_dev->dev, + "Cmdq IO error, opcode = %x, return = %x\n", + desc->opcode, desc_ret); + ret = -EIO; } - } + } else { + /* FW/HW reset or incorrect number of desc */ + tail = roce_read(hr_dev, ROCEE_TX_CMQ_TAIL_REG); + dev_warn(hr_dev->dev, "CMDQ move tail from %d to %d\n", + csq->head, tail); + csq->head = tail; - if (!complete) ret = -EAGAIN; - - /* clean the command send queue */ - handle = hns_roce_cmq_csq_clean(hr_dev); - if (handle != num) - dev_warn(hr_dev->dev, "Cleaned %d, need to clean %d\n", - handle, num); + } spin_unlock_bh(&csq->lock); @@ -1530,7 +1553,8 @@ static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev) CFG_GLOBAL_PARAM_DATA_0_ROCEE_TIME_1US_CFG_S, 0x3e8); roce_set_field(req->time_cfg_udp_port, CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_M, - CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_S, 0x12b7); + CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_S, + ROCE_V2_UDP_DPORT); return hns_roce_cmq_send(hr_dev, &desc, 1); } @@ -1541,17 +1565,13 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev) struct hns_roce_pf_res_a *req_a; struct hns_roce_pf_res_b *req_b; int ret; - int i; - for (i = 0; i < 2; i++) { - hns_roce_cmq_setup_basic_desc(&desc[i], - HNS_ROCE_OPC_QUERY_PF_RES, true); + hns_roce_cmq_setup_basic_desc(&desc[0], HNS_ROCE_OPC_QUERY_PF_RES, + true); + desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - if (i == 0) - desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - else - desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - } + hns_roce_cmq_setup_basic_desc(&desc[1], HNS_ROCE_OPC_QUERY_PF_RES, + true); ret = hns_roce_cmq_send(hr_dev, desc, 2); if (ret) @@ -1644,19 +1664,16 @@ static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev) struct hns_roce_cmq_desc desc[2]; struct hns_roce_vf_res_a *req_a; struct hns_roce_vf_res_b *req_b; - int i; req_a = (struct hns_roce_vf_res_a *)desc[0].data; req_b = (struct hns_roce_vf_res_b *)desc[1].data; - for (i = 0; i < 2; i++) { - hns_roce_cmq_setup_basic_desc(&desc[i], - HNS_ROCE_OPC_ALLOC_VF_RES, false); - if (i == 0) - desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - else - desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - } + hns_roce_cmq_setup_basic_desc(&desc[0], HNS_ROCE_OPC_ALLOC_VF_RES, + false); + desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); + + hns_roce_cmq_setup_basic_desc(&desc[1], HNS_ROCE_OPC_ALLOC_VF_RES, + false); roce_set_field(req_a->vf_qpc_bt_idx_num, VF_RES_A_DATA_1_VF_QPC_BT_IDX_M, @@ -1866,7 +1883,6 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->flags = HNS_ROCE_CAP_FLAG_REREG_MR | HNS_ROCE_CAP_FLAG_ROCE_V1_V2 | - HNS_ROCE_CAP_FLAG_RQ_INLINE | HNS_ROCE_CAP_FLAG_RECORD_DB | HNS_ROCE_CAP_FLAG_SQ_RECORD_DB; @@ -1999,10 +2015,12 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->max_sq_sg = le16_to_cpu(resp_a->max_sq_sg); caps->max_sq_inline = le16_to_cpu(resp_a->max_sq_inline); caps->max_rq_sg = le16_to_cpu(resp_a->max_rq_sg); + caps->max_rq_sg = roundup_pow_of_two(caps->max_rq_sg); caps->max_extend_sg = le32_to_cpu(resp_a->max_extend_sg); caps->num_qpc_timer = le16_to_cpu(resp_a->num_qpc_timer); caps->num_cqc_timer = le16_to_cpu(resp_a->num_cqc_timer); caps->max_srq_sges = le16_to_cpu(resp_a->max_srq_sges); + caps->max_srq_sges = roundup_pow_of_two(caps->max_srq_sges); caps->num_aeq_vectors = resp_a->num_aeq_vectors; caps->num_other_vectors = resp_a->num_other_vectors; caps->max_sq_desc_sz = resp_a->max_sq_desc_sz; @@ -2336,7 +2354,6 @@ static int hns_roce_config_link_table(struct hns_roce_dev *hr_dev, struct hns_roce_link_table_entry *entry; enum hns_roce_opcode_type opcode; u32 page_num; - int i; switch (type) { case TSQ_LINK_TABLE: @@ -2354,14 +2371,10 @@ static int hns_roce_config_link_table(struct hns_roce_dev *hr_dev, page_num = link_tbl->npages; entry = link_tbl->table.buf; - for (i = 0; i < 2; i++) { - hns_roce_cmq_setup_basic_desc(&desc[i], opcode, false); + hns_roce_cmq_setup_basic_desc(&desc[0], opcode, false); + desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - if (i == 0) - desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - else - desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - } + hns_roce_cmq_setup_basic_desc(&desc[1], opcode, false); req_a->base_addr_l = cpu_to_le32(link_tbl->table.map & 0xffffffff); req_a->base_addr_h = cpu_to_le32(link_tbl->table.map >> 32); @@ -2880,36 +2893,20 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev, mpt_entry = mb_buf; memset(mpt_entry, 0, sizeof(*mpt_entry)); - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M, - V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_VALID); - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M, - V2_MPT_BYTE_4_PBL_HOP_NUM_S, mr->pbl_hop_num == - HNS_ROCE_HOP_NUM_0 ? 0 : mr->pbl_hop_num); - roce_set_field(mpt_entry->byte_4_pd_hop_st, - V2_MPT_BYTE_4_PBL_BA_PG_SZ_M, - V2_MPT_BYTE_4_PBL_BA_PG_SZ_S, - to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift)); - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, - V2_MPT_BYTE_4_PD_S, mr->pd); - - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 0); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 0); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_BIND_EN_S, - (mr->access & IB_ACCESS_MW_BIND ? 1 : 0)); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_ATOMIC_EN_S, - mr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RR_EN_S, - (mr->access & IB_ACCESS_REMOTE_READ ? 1 : 0)); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RW_EN_S, - (mr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0)); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_LW_EN_S, - (mr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0)); - - roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, - mr->type == MR_TYPE_MR ? 0 : 1); - roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_INNER_PA_VLD_S, - 1); + hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_VALID); + hr_reg_write(mpt_entry, MPT_PD, mr->pd); + hr_reg_enable(mpt_entry, MPT_L_INV_EN); + + hr_reg_write(mpt_entry, MPT_BIND_EN, + !!(mr->access & IB_ACCESS_MW_BIND)); + hr_reg_write(mpt_entry, MPT_ATOMIC_EN, + !!(mr->access & IB_ACCESS_REMOTE_ATOMIC)); + hr_reg_write(mpt_entry, MPT_RR_EN, + !!(mr->access & IB_ACCESS_REMOTE_READ)); + hr_reg_write(mpt_entry, MPT_RW_EN, + !!(mr->access & IB_ACCESS_REMOTE_WRITE)); + hr_reg_write(mpt_entry, MPT_LW_EN, + !!((mr->access & IB_ACCESS_LOCAL_WRITE))); mpt_entry->len_l = cpu_to_le32(lower_32_bits(mr->size)); mpt_entry->len_h = cpu_to_le32(upper_32_bits(mr->size)); @@ -2917,9 +2914,19 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev, mpt_entry->va_l = cpu_to_le32(lower_32_bits(mr->iova)); mpt_entry->va_h = cpu_to_le32(upper_32_bits(mr->iova)); + if (mr->type != MR_TYPE_MR) + hr_reg_enable(mpt_entry, MPT_PA); + if (mr->type == MR_TYPE_DMA) return 0; + if (mr->pbl_hop_num != HNS_ROCE_HOP_NUM_0) + hr_reg_write(mpt_entry, MPT_PBL_HOP_NUM, mr->pbl_hop_num); + + hr_reg_write(mpt_entry, MPT_PBL_BA_PG_SZ, + to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift)); + hr_reg_enable(mpt_entry, MPT_INNER_PA_VLD); + ret = set_mtpt_pbl(hr_dev, mpt_entry, mr); return ret; @@ -2927,20 +2934,17 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev, static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, int flags, - u32 pdn, int mr_access_flags, u64 iova, - u64 size, void *mb_buf) + void *mb_buf) { struct hns_roce_v2_mpt_entry *mpt_entry = mb_buf; + u32 mr_access_flags = mr->access; int ret = 0; roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M, V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_VALID); - if (flags & IB_MR_REREG_PD) { - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, - V2_MPT_BYTE_4_PD_S, pdn); - mr->pd = pdn; - } + roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, + V2_MPT_BYTE_4_PD_S, mr->pd); if (flags & IB_MR_REREG_ACCESS) { roce_set_bit(mpt_entry->byte_8_mw_cnt_en, @@ -2958,13 +2962,10 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev, } if (flags & IB_MR_REREG_TRANS) { - mpt_entry->va_l = cpu_to_le32(lower_32_bits(iova)); - mpt_entry->va_h = cpu_to_le32(upper_32_bits(iova)); - mpt_entry->len_l = cpu_to_le32(lower_32_bits(size)); - mpt_entry->len_h = cpu_to_le32(upper_32_bits(size)); - - mr->iova = iova; - mr->size = size; + mpt_entry->va_l = cpu_to_le32(lower_32_bits(mr->iova)); + mpt_entry->va_h = cpu_to_le32(upper_32_bits(mr->iova)); + mpt_entry->len_l = cpu_to_le32(lower_32_bits(mr->size)); + mpt_entry->len_h = cpu_to_le32(upper_32_bits(mr->size)); ret = set_mtpt_pbl(hr_dev, mpt_entry, mr); } @@ -3126,11 +3127,6 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, if (nfreed) { hr_cq->cons_index += nfreed; - /* - * Make sure update of buffer contents is done before - * updating consumer index. - */ - wmb(); hns_roce_v2_cq_set_ci(hr_cq, hr_cq->cons_index); } } @@ -3639,11 +3635,8 @@ static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries, break; } - if (npolled) { - /* Memory barrier */ - wmb(); + if (npolled) hns_roce_v2_cq_set_ci(hr_cq, hr_cq->cons_index); - } out: spin_unlock_irqrestore(&hr_cq->lock, flags); @@ -4235,7 +4228,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, struct hns_roce_v2_qp_context *context, struct hns_roce_v2_qp_context *qpc_mask) { - const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct ib_device *ibdev = &hr_dev->ib_dev; @@ -4243,7 +4235,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, dma_addr_t irrl_ba; enum ib_mtu mtu; u8 lp_pktn_ini; - u8 port_num; u64 *mtts; u8 *dmac; u8 *smac; @@ -4324,15 +4315,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S, 0); } - /* Configure GID index */ - port_num = rdma_ah_get_port_num(&attr->ah_attr); - roce_set_field(context->byte_20_smac_sgid_idx, - V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S, - hns_get_gid_index(hr_dev, port_num - 1, - grh->sgid_index)); - roce_set_field(qpc_mask->byte_20_smac_sgid_idx, - V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S, 0); - memcpy(&(context->dmac), dmac, sizeof(u32)); roce_set_field(context->byte_52_udpspn_dmac, V2_QPC_BYTE_52_DMAC_M, V2_QPC_BYTE_52_DMAC_S, *((u16 *)(&dmac[4]))); @@ -5083,7 +5065,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, done: qp_attr->cur_qp_state = qp_attr->qp_state; qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt; - qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs; + qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge; if (!ibqp->uobject) { qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt; @@ -5174,6 +5156,9 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev, struct hns_roce_cmq_desc desc; int ret, i; + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + return 0; + mutex_lock(&hr_dev->qp_table.scc_mutex); /* set scc ctx clear done flag */ @@ -5220,98 +5205,96 @@ out: return ret; } -static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, - struct hns_roce_srq *srq, u32 pdn, u16 xrcd, - u32 cqn, void *mb_buf, u64 *mtts_wqe, - u64 *mtts_idx, dma_addr_t dma_handle_wqe, - dma_addr_t dma_handle_idx) +#define DMA_IDX_SHIFT 3 +#define DMA_WQE_SHIFT 3 + +static int hns_roce_v2_write_srqc_index_queue(struct hns_roce_srq *srq, + struct hns_roce_srq_context *ctx) { - struct hns_roce_srq_context *srq_context; + struct hns_roce_idx_que *idx_que = &srq->idx_que; + struct ib_device *ibdev = srq->ibsrq.device; + struct hns_roce_dev *hr_dev = to_hr_dev(ibdev); + u64 mtts_idx[MTT_MIN_COUNT] = {}; + dma_addr_t dma_handle_idx = 0; + int ret; + + /* Get physical address of idx que buf */ + ret = hns_roce_mtr_find(hr_dev, &idx_que->mtr, 0, mtts_idx, + ARRAY_SIZE(mtts_idx), &dma_handle_idx); + if (ret < 1) { + ibdev_err(ibdev, "failed to find mtr for SRQ idx, ret = %d.\n", + ret); + return -ENOBUFS; + } + + hr_reg_write(ctx, SRQC_IDX_HOP_NUM, + to_hr_hem_hopnum(hr_dev->caps.idx_hop_num, srq->wqe_cnt)); + + hr_reg_write(ctx, SRQC_IDX_BT_BA_L, dma_handle_idx >> DMA_IDX_SHIFT); + hr_reg_write(ctx, SRQC_IDX_BT_BA_H, + upper_32_bits(dma_handle_idx >> DMA_IDX_SHIFT)); + + hr_reg_write(ctx, SRQC_IDX_BA_PG_SZ, + to_hr_hw_page_shift(idx_que->mtr.hem_cfg.ba_pg_shift)); + hr_reg_write(ctx, SRQC_IDX_BUF_PG_SZ, + to_hr_hw_page_shift(idx_que->mtr.hem_cfg.buf_pg_shift)); + + hr_reg_write(ctx, SRQC_IDX_CUR_BLK_ADDR_L, + to_hr_hw_page_addr(mtts_idx[0])); + hr_reg_write(ctx, SRQC_IDX_CUR_BLK_ADDR_H, + upper_32_bits(to_hr_hw_page_addr(mtts_idx[0]))); + + hr_reg_write(ctx, SRQC_IDX_NXT_BLK_ADDR_L, + to_hr_hw_page_addr(mtts_idx[1])); + hr_reg_write(ctx, SRQC_IDX_NXT_BLK_ADDR_H, + upper_32_bits(to_hr_hw_page_addr(mtts_idx[1]))); + + return 0; +} - srq_context = mb_buf; - memset(srq_context, 0, sizeof(*srq_context)); - - roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQ_ST_M, - SRQC_BYTE_4_SRQ_ST_S, 1); - - roce_set_field(srq_context->byte_4_srqn_srqst, - SRQC_BYTE_4_SRQ_WQE_HOP_NUM_M, - SRQC_BYTE_4_SRQ_WQE_HOP_NUM_S, - to_hr_hem_hopnum(hr_dev->caps.srqwqe_hop_num, - srq->wqe_cnt)); - roce_set_field(srq_context->byte_4_srqn_srqst, - SRQC_BYTE_4_SRQ_SHIFT_M, SRQC_BYTE_4_SRQ_SHIFT_S, - ilog2(srq->wqe_cnt)); - - roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQN_M, - SRQC_BYTE_4_SRQN_S, srq->srqn); - - roce_set_field(srq_context->byte_8_limit_wl, SRQC_BYTE_8_SRQ_LIMIT_WL_M, - SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0); - - roce_set_field(srq_context->byte_12_xrcd, SRQC_BYTE_12_SRQ_XRCD_M, - SRQC_BYTE_12_SRQ_XRCD_S, xrcd); - - srq_context->wqe_bt_ba = cpu_to_le32((u32)(dma_handle_wqe >> 3)); - - roce_set_field(srq_context->byte_24_wqe_bt_ba, - SRQC_BYTE_24_SRQ_WQE_BT_BA_M, - SRQC_BYTE_24_SRQ_WQE_BT_BA_S, - dma_handle_wqe >> 35); - - roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_PD_M, - SRQC_BYTE_28_PD_S, pdn); - roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_RQWS_M, - SRQC_BYTE_28_RQWS_S, srq->max_gs <= 0 ? 0 : - fls(srq->max_gs - 1)); - - srq_context->idx_bt_ba = cpu_to_le32(dma_handle_idx >> 3); - roce_set_field(srq_context->rsv_idx_bt_ba, - SRQC_BYTE_36_SRQ_IDX_BT_BA_M, - SRQC_BYTE_36_SRQ_IDX_BT_BA_S, - dma_handle_idx >> 35); - - srq_context->idx_cur_blk_addr = - cpu_to_le32(to_hr_hw_page_addr(mtts_idx[0])); - roce_set_field(srq_context->byte_44_idxbufpgsz_addr, - SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M, - SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S, - upper_32_bits(to_hr_hw_page_addr(mtts_idx[0]))); - roce_set_field(srq_context->byte_44_idxbufpgsz_addr, - SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M, - SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S, - to_hr_hem_hopnum(hr_dev->caps.idx_hop_num, - srq->wqe_cnt)); - - roce_set_field(srq_context->byte_44_idxbufpgsz_addr, - SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M, - SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S, - to_hr_hw_page_shift(srq->idx_que.mtr.hem_cfg.ba_pg_shift)); - roce_set_field(srq_context->byte_44_idxbufpgsz_addr, - SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M, - SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S, - to_hr_hw_page_shift(srq->idx_que.mtr.hem_cfg.buf_pg_shift)); - - srq_context->idx_nxt_blk_addr = - cpu_to_le32(to_hr_hw_page_addr(mtts_idx[1])); - roce_set_field(srq_context->rsv_idxnxtblkaddr, - SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M, - SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S, - upper_32_bits(to_hr_hw_page_addr(mtts_idx[1]))); - roce_set_field(srq_context->byte_56_xrc_cqn, - SRQC_BYTE_56_SRQ_XRC_CQN_M, SRQC_BYTE_56_SRQ_XRC_CQN_S, - cqn); - roce_set_field(srq_context->byte_56_xrc_cqn, - SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M, - SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S, - to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.ba_pg_shift)); - roce_set_field(srq_context->byte_56_xrc_cqn, - SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_M, - SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_S, - to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.buf_pg_shift)); - - roce_set_bit(srq_context->db_record_addr_record_en, - SRQC_BYTE_60_SRQ_RECORD_EN_S, 0); +static int hns_roce_v2_write_srqc(struct hns_roce_srq *srq, void *mb_buf) +{ + struct ib_device *ibdev = srq->ibsrq.device; + struct hns_roce_dev *hr_dev = to_hr_dev(ibdev); + struct hns_roce_srq_context *ctx = mb_buf; + u64 mtts_wqe[MTT_MIN_COUNT] = {}; + dma_addr_t dma_handle_wqe = 0; + int ret; + + memset(ctx, 0, sizeof(*ctx)); + + /* Get the physical address of srq buf */ + ret = hns_roce_mtr_find(hr_dev, &srq->buf_mtr, 0, mtts_wqe, + ARRAY_SIZE(mtts_wqe), &dma_handle_wqe); + if (ret < 1) { + ibdev_err(ibdev, "failed to find mtr for SRQ WQE, ret = %d.\n", + ret); + return -ENOBUFS; + } + + hr_reg_write(ctx, SRQC_SRQ_ST, 1); + hr_reg_write(ctx, SRQC_PD, to_hr_pd(srq->ibsrq.pd)->pdn); + hr_reg_write(ctx, SRQC_SRQN, srq->srqn); + hr_reg_write(ctx, SRQC_XRCD, 0); + hr_reg_write(ctx, SRQC_XRC_CQN, srq->cqn); + hr_reg_write(ctx, SRQC_SHIFT, ilog2(srq->wqe_cnt)); + hr_reg_write(ctx, SRQC_RQWS, + srq->max_gs <= 0 ? 0 : fls(srq->max_gs - 1)); + + hr_reg_write(ctx, SRQC_WQE_HOP_NUM, + to_hr_hem_hopnum(hr_dev->caps.srqwqe_hop_num, + srq->wqe_cnt)); + + hr_reg_write(ctx, SRQC_WQE_BT_BA_L, dma_handle_wqe >> DMA_WQE_SHIFT); + hr_reg_write(ctx, SRQC_WQE_BT_BA_H, + upper_32_bits(dma_handle_wqe >> DMA_WQE_SHIFT)); + + hr_reg_write(ctx, SRQC_WQE_BA_PG_SZ, + to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.ba_pg_shift)); + hr_reg_write(ctx, SRQC_WQE_BUF_PG_SZ, + to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.buf_pg_shift)); + + return hns_roce_v2_write_srqc_index_queue(srq, ctx); } static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, @@ -5331,7 +5314,7 @@ static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, return -EINVAL; if (srq_attr_mask & IB_SRQ_LIMIT) { - if (srq_attr->srq_limit >= srq->wqe_cnt) + if (srq_attr->srq_limit > srq->wqe_cnt) return -EINVAL; mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); @@ -5394,8 +5377,8 @@ static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) SRQC_BYTE_8_SRQ_LIMIT_WL_S); attr->srq_limit = limit_wl; - attr->max_wr = srq->wqe_cnt - 1; - attr->max_sge = srq->max_gs; + attr->max_wr = srq->wqe_cnt; + attr->max_sge = srq->max_gs - srq->rsv_sge; out: hns_roce_free_cmd_mailbox(hr_dev, mailbox); @@ -5626,9 +5609,6 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, ++eq->cons_index; aeqe_found = 1; - if (eq->cons_index > (2 * eq->entries - 1)) - eq->cons_index = 0; - hns_roce_v2_init_irq_work(hr_dev, eq, queue_num); aeqe = next_aeqe_sw_v2(eq); @@ -5671,9 +5651,6 @@ static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev, ++eq->cons_index; ceqe_found = 1; - if (eq->cons_index > (EQ_DEPTH_COEFF * eq->entries - 1)) - eq->cons_index = 0; - ceqe = next_ceqe_sw_v2(eq); } @@ -5948,7 +5925,6 @@ static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) buf_attr.region[0].size = eq->entries * eq->eqe_size; buf_attr.region[0].hopnum = eq->hop_num; buf_attr.region_count = 1; - buf_attr.fixed_page = true; err = hns_roce_mtr_create(hr_dev, &eq->mtr, &buf_attr, hr_dev->caps.eqe_ba_pg_sz + @@ -6286,6 +6262,7 @@ static void hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev, /* Get info from NIC driver. */ hr_dev->reg_base = handle->rinfo.roce_io_base; + hr_dev->mem_base = handle->rinfo.roce_mem_base; hr_dev->caps.num_ports = 1; hr_dev->iboe.netdevs[0] = handle->rinfo.netdev; hr_dev->iboe.phy_port[0] = 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index bdaccf86460d..39621fb6ec16 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -96,7 +96,8 @@ #define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE #define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000 #define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2 -#define HNS_ROCE_INVALID_LKEY 0x100 +#define HNS_ROCE_INVALID_LKEY 0x0 +#define HNS_ROCE_INVALID_SGE_LENGTH 0x80000000 #define HNS_ROCE_CMQ_TX_TIMEOUT 30000 #define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE 2 #define HNS_ROCE_V2_RSV_QPS 8 @@ -366,24 +367,61 @@ struct hns_roce_v2_cq_context { #define CQC_STASH CQC_FIELD_LOC(63, 63) struct hns_roce_srq_context { - __le32 byte_4_srqn_srqst; - __le32 byte_8_limit_wl; - __le32 byte_12_xrcd; - __le32 byte_16_pi_ci; - __le32 wqe_bt_ba; - __le32 byte_24_wqe_bt_ba; - __le32 byte_28_rqws_pd; - __le32 idx_bt_ba; - __le32 rsv_idx_bt_ba; - __le32 idx_cur_blk_addr; - __le32 byte_44_idxbufpgsz_addr; - __le32 idx_nxt_blk_addr; - __le32 rsv_idxnxtblkaddr; - __le32 byte_56_xrc_cqn; - __le32 db_record_addr_record_en; - __le32 db_record_addr; + __le32 byte_4_srqn_srqst; + __le32 byte_8_limit_wl; + __le32 byte_12_xrcd; + __le32 byte_16_pi_ci; + __le32 wqe_bt_ba; + __le32 byte_24_wqe_bt_ba; + __le32 byte_28_rqws_pd; + __le32 idx_bt_ba; + __le32 rsv_idx_bt_ba; + __le32 idx_cur_blk_addr; + __le32 byte_44_idxbufpgsz_addr; + __le32 idx_nxt_blk_addr; + __le32 rsv_idxnxtblkaddr; + __le32 byte_56_xrc_cqn; + __le32 db_record_addr_record_en; + __le32 db_record_addr; }; +#define SRQC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_srq_context, h, l) + +#define SRQC_SRQ_ST SRQC_FIELD_LOC(1, 0) +#define SRQC_WQE_HOP_NUM SRQC_FIELD_LOC(3, 2) +#define SRQC_SHIFT SRQC_FIELD_LOC(7, 4) +#define SRQC_SRQN SRQC_FIELD_LOC(31, 8) +#define SRQC_LIMIT_WL SRQC_FIELD_LOC(47, 32) +#define SRQC_RSV0 SRQC_FIELD_LOC(63, 48) +#define SRQC_XRCD SRQC_FIELD_LOC(87, 64) +#define SRQC_RSV1 SRQC_FIELD_LOC(95, 88) +#define SRQC_PRODUCER_IDX SRQC_FIELD_LOC(111, 96) +#define SRQC_CONSUMER_IDX SRQC_FIELD_LOC(127, 112) +#define SRQC_WQE_BT_BA_L SRQC_FIELD_LOC(159, 128) +#define SRQC_WQE_BT_BA_H SRQC_FIELD_LOC(188, 160) +#define SRQC_RSV2 SRQC_FIELD_LOC(191, 189) +#define SRQC_PD SRQC_FIELD_LOC(215, 192) +#define SRQC_RQWS SRQC_FIELD_LOC(219, 216) +#define SRQC_RSV3 SRQC_FIELD_LOC(223, 220) +#define SRQC_IDX_BT_BA_L SRQC_FIELD_LOC(255, 224) +#define SRQC_IDX_BT_BA_H SRQC_FIELD_LOC(284, 256) +#define SRQC_RSV4 SRQC_FIELD_LOC(287, 285) +#define SRQC_IDX_CUR_BLK_ADDR_L SRQC_FIELD_LOC(319, 288) +#define SRQC_IDX_CUR_BLK_ADDR_H SRQC_FIELD_LOC(339, 320) +#define SRQC_RSV5 SRQC_FIELD_LOC(341, 340) +#define SRQC_IDX_HOP_NUM SRQC_FIELD_LOC(343, 342) +#define SRQC_IDX_BA_PG_SZ SRQC_FIELD_LOC(347, 344) +#define SRQC_IDX_BUF_PG_SZ SRQC_FIELD_LOC(351, 348) +#define SRQC_IDX_NXT_BLK_ADDR_L SRQC_FIELD_LOC(383, 352) +#define SRQC_IDX_NXT_BLK_ADDR_H SRQC_FIELD_LOC(403, 384) +#define SRQC_RSV6 SRQC_FIELD_LOC(415, 404) +#define SRQC_XRC_CQN SRQC_FIELD_LOC(439, 416) +#define SRQC_WQE_BA_PG_SZ SRQC_FIELD_LOC(443, 440) +#define SRQC_WQE_BUF_PG_SZ SRQC_FIELD_LOC(447, 444) +#define SRQC_DB_RECORD_EN SRQC_FIELD_LOC(448, 448) +#define SRQC_DB_RECORD_ADDR_L SRQC_FIELD_LOC(479, 449) +#define SRQC_DB_RECORD_ADDR_H SRQC_FIELD_LOC(511, 480) + #define SRQC_BYTE_4_SRQ_ST_S 0 #define SRQC_BYTE_4_SRQ_ST_M GENMASK(1, 0) @@ -993,6 +1031,45 @@ struct hns_roce_v2_mpt_entry { __le32 byte_64_buf_pa1; }; +#define MPT_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_mpt_entry, h, l) + +#define MPT_ST MPT_FIELD_LOC(1, 0) +#define MPT_PBL_HOP_NUM MPT_FIELD_LOC(3, 2) +#define MPT_PBL_BA_PG_SZ MPT_FIELD_LOC(7, 4) +#define MPT_PD MPT_FIELD_LOC(31, 8) +#define MPT_RA_EN MPT_FIELD_LOC(32, 32) +#define MPT_R_INV_EN MPT_FIELD_LOC(33, 33) +#define MPT_L_INV_EN MPT_FIELD_LOC(34, 34) +#define MPT_BIND_EN MPT_FIELD_LOC(35, 35) +#define MPT_ATOMIC_EN MPT_FIELD_LOC(36, 36) +#define MPT_RR_EN MPT_FIELD_LOC(37, 37) +#define MPT_RW_EN MPT_FIELD_LOC(38, 38) +#define MPT_LW_EN MPT_FIELD_LOC(39, 39) +#define MPT_MW_CNT MPT_FIELD_LOC(63, 40) +#define MPT_FRE MPT_FIELD_LOC(64, 64) +#define MPT_PA MPT_FIELD_LOC(65, 65) +#define MPT_ZBVA MPT_FIELD_LOC(66, 66) +#define MPT_SHARE MPT_FIELD_LOC(67, 67) +#define MPT_MR_MW MPT_FIELD_LOC(68, 68) +#define MPT_BPD MPT_FIELD_LOC(69, 69) +#define MPT_BQP MPT_FIELD_LOC(70, 70) +#define MPT_INNER_PA_VLD MPT_FIELD_LOC(71, 71) +#define MPT_MW_BIND_QPN MPT_FIELD_LOC(95, 72) +#define MPT_BOUND_LKEY MPT_FIELD_LOC(127, 96) +#define MPT_LEN MPT_FIELD_LOC(191, 128) +#define MPT_LKEY MPT_FIELD_LOC(223, 192) +#define MPT_VA MPT_FIELD_LOC(287, 224) +#define MPT_PBL_SIZE MPT_FIELD_LOC(319, 288) +#define MPT_PBL_BA MPT_FIELD_LOC(380, 320) +#define MPT_BLK_MODE MPT_FIELD_LOC(381, 381) +#define MPT_RSV0 MPT_FIELD_LOC(383, 382) +#define MPT_PA0 MPT_FIELD_LOC(441, 384) +#define MPT_BOUND_VA MPT_FIELD_LOC(447, 442) +#define MPT_PA1 MPT_FIELD_LOC(505, 448) +#define MPT_PERSIST_EN MPT_FIELD_LOC(506, 506) +#define MPT_RSV2 MPT_FIELD_LOC(507, 507) +#define MPT_PBL_BUF_PG_SZ MPT_FIELD_LOC(511, 508) + #define V2_MPT_BYTE_4_MPT_ST_S 0 #define V2_MPT_BYTE_4_MPT_ST_M GENMASK(1, 0) @@ -1059,6 +1136,8 @@ struct hns_roce_v2_mpt_entry { #define V2_DB_BYTE_4_CMD_S 24 #define V2_DB_BYTE_4_CMD_M GENMASK(27, 24) +#define V2_DB_FLAG_S 31 + #define V2_DB_PARAMETER_IDX_S 0 #define V2_DB_PARAMETER_IDX_M GENMASK(15, 0) @@ -1155,6 +1234,15 @@ struct hns_roce_v2_rc_send_wqe { #define V2_RC_SEND_WQE_BYTE_4_OPCODE_S 0 #define V2_RC_SEND_WQE_BYTE_4_OPCODE_M GENMASK(4, 0) +#define V2_RC_SEND_WQE_BYTE_4_DB_SL_L_S 5 +#define V2_RC_SEND_WQE_BYTE_4_DB_SL_L_M GENMASK(6, 5) + +#define V2_RC_SEND_WQE_BYTE_4_DB_SL_H_S 13 +#define V2_RC_SEND_WQE_BYTE_4_DB_SL_H_M GENMASK(14, 13) + +#define V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_S 15 +#define V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_M GENMASK(30, 15) + #define V2_RC_SEND_WQE_BYTE_4_OWNER_S 7 #define V2_RC_SEND_WQE_BYTE_4_CQE_S 8 @@ -1167,15 +1255,17 @@ struct hns_roce_v2_rc_send_wqe { #define V2_RC_SEND_WQE_BYTE_4_INLINE_S 12 -#define V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S 19 +#define V2_RC_FRMR_WQE_BYTE_40_BIND_EN_S 10 + +#define V2_RC_FRMR_WQE_BYTE_40_ATOMIC_S 11 -#define V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S 20 +#define V2_RC_FRMR_WQE_BYTE_40_RR_S 12 -#define V2_RC_FRMR_WQE_BYTE_4_RR_S 21 +#define V2_RC_FRMR_WQE_BYTE_40_RW_S 13 -#define V2_RC_FRMR_WQE_BYTE_4_RW_S 22 +#define V2_RC_FRMR_WQE_BYTE_40_LW_S 14 -#define V2_RC_FRMR_WQE_BYTE_4_LW_S 23 +#define V2_RC_SEND_WQE_BYTE_4_FLAG_S 31 #define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_S 0 #define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_M GENMASK(23, 0) @@ -1190,7 +1280,7 @@ struct hns_roce_v2_rc_send_wqe { struct hns_roce_wqe_frmr_seg { __le32 pbl_size; - __le32 mode_buf_pg_sz; + __le32 byte_40; }; #define V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S 4 @@ -1786,12 +1876,8 @@ struct hns_roce_v2_cmq_ring { dma_addr_t desc_dma_addr; struct hns_roce_cmq_desc *desc; u32 head; - u32 tail; - u16 buf_size; u16 desc_num; - int next_to_use; - int next_to_clean; u8 flag; spinlock_t lock; /* command queue lock */ }; @@ -1800,7 +1886,6 @@ struct hns_roce_v2_cmq { struct hns_roce_v2_cmq_ring csq; struct hns_roce_v2_cmq_ring crq; u16 tx_timeout; - u16 last_status; }; enum hns_roce_link_table_type { diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index d9179bae4989..c9c0836394a2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -42,22 +42,6 @@ #include "hns_roce_device.h" #include "hns_roce_hem.h" -/** - * hns_get_gid_index - Get gid index. - * @hr_dev: pointer to structure hns_roce_dev. - * @port: port, value range: 0 ~ MAX - * @gid_index: gid_index, value range: 0 ~ MAX - * Description: - * N ports shared gids, allocation method as follow: - * GID[0][0], GID[1][0],.....GID[N - 1][0], - * GID[0][0], GID[1][0],.....GID[N - 1][0], - * And so on - */ -u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index) -{ - return gid_index * hr_dev->caps.num_ports + port; -} - static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr) { u8 phy_port; @@ -217,7 +201,8 @@ static int hns_roce_query_device(struct ib_device *ib_dev, props->max_srq_sge = hr_dev->caps.max_srq_sges; } - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR) { + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR && + hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) { props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; props->max_fast_reg_page_list_len = HNS_ROCE_FRMR_MAX_PA; } @@ -748,11 +733,7 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) goto err_pd_table_free; } - ret = hns_roce_init_cq_table(hr_dev); - if (ret) { - dev_err(dev, "Failed to init completion queue table.\n"); - goto err_mr_table_free; - } + hns_roce_init_cq_table(hr_dev); ret = hns_roce_init_qp_table(hr_dev); if (ret) { @@ -772,13 +753,10 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) return 0; err_qp_table_free: - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) - hns_roce_cleanup_qp_table(hr_dev); + hns_roce_cleanup_qp_table(hr_dev); err_cq_table_free: hns_roce_cleanup_cq_table(hr_dev); - -err_mr_table_free: hns_roce_cleanup_mr_table(hr_dev); err_pd_table_free: diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 1bcffd93ff3e..79b3c3023fe7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -66,8 +66,7 @@ int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev, HNS_ROCE_CMD_TIMEOUT_MSECS); } -static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, - u32 pd, u64 iova, u64 size, u32 access) +static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) { struct ib_device *ibdev = &hr_dev->ib_dev; unsigned long obj = 0; @@ -82,11 +81,6 @@ static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, return -ENOMEM; } - mr->iova = iova; /* MR va starting addr */ - mr->size = size; /* MR addr range */ - mr->pd = pd; /* MR num */ - mr->access = access; /* MR access permit */ - mr->enabled = 0; /* MR active status */ mr->key = hw_index_to_key(obj); /* MR key */ err = hns_roce_table_get(hr_dev, &hr_dev->mr_table.mtpt_table, obj); @@ -110,8 +104,7 @@ static void free_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) } static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, - size_t length, struct ib_udata *udata, u64 start, - int access) + struct ib_udata *udata, u64 start) { struct ib_device *ibdev = &hr_dev->ib_dev; bool is_fast = mr->type == MR_TYPE_FRMR; @@ -121,11 +114,10 @@ static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, mr->pbl_hop_num = is_fast ? 1 : hr_dev->caps.pbl_hop_num; buf_attr.page_shift = is_fast ? PAGE_SHIFT : hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT; - buf_attr.region[0].size = length; + buf_attr.region[0].size = mr->size; buf_attr.region[0].hopnum = mr->pbl_hop_num; buf_attr.region_count = 1; - buf_attr.fixed_page = true; - buf_attr.user_access = access; + buf_attr.user_access = mr->access; /* fast MR's buffer is alloced before mapping, not at creation */ buf_attr.mtt_only = is_fast; @@ -197,9 +189,6 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, } mr->enabled = 1; - hns_roce_free_cmd_mailbox(hr_dev, mailbox); - - return 0; err_page: hns_roce_free_cmd_mailbox(hr_dev, mailbox); @@ -237,14 +226,16 @@ struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc) return ERR_PTR(-ENOMEM); mr->type = MR_TYPE_DMA; + mr->pd = to_hr_pd(pd)->pdn; + mr->access = acc; /* Allocate memory region key */ hns_roce_hem_list_init(&mr->pbl_mtr.hem_list); - ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, 0, 0, acc); + ret = alloc_mr_key(hr_dev, mr); if (ret) goto err_free; - ret = hns_roce_mr_enable(to_hr_dev(pd->device), mr); + ret = hns_roce_mr_enable(hr_dev, mr); if (ret) goto err_mr; @@ -271,13 +262,17 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mr) return ERR_PTR(-ENOMEM); + mr->iova = virt_addr; + mr->size = length; + mr->pd = to_hr_pd(pd)->pdn; + mr->access = access_flags; mr->type = MR_TYPE_MR; - ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, virt_addr, length, - access_flags); + + ret = alloc_mr_key(hr_dev, mr); if (ret) goto err_alloc_mr; - ret = alloc_mr_pbl(hr_dev, mr, length, udata, start, access_flags); + ret = alloc_mr_pbl(hr_dev, mr, udata, start); if (ret) goto err_alloc_key; @@ -299,35 +294,6 @@ err_alloc_mr: return ERR_PTR(ret); } -static int rereg_mr_trans(struct ib_mr *ibmr, int flags, - u64 start, u64 length, - u64 virt_addr, int mr_access_flags, - struct hns_roce_cmd_mailbox *mailbox, - u32 pdn, struct ib_udata *udata) -{ - struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); - struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_mr *mr = to_hr_mr(ibmr); - int ret; - - free_mr_pbl(hr_dev, mr); - ret = alloc_mr_pbl(hr_dev, mr, length, udata, start, mr_access_flags); - if (ret) { - ibdev_err(ibdev, "failed to create mr PBL, ret = %d.\n", ret); - return ret; - } - - ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn, - mr_access_flags, virt_addr, - length, mailbox->buf); - if (ret) { - ibdev_err(ibdev, "failed to write mtpt, ret = %d.\n", ret); - free_mr_pbl(hr_dev, mr); - } - - return ret; -} - struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_pd *pd, @@ -338,7 +304,6 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, struct hns_roce_mr *mr = to_hr_mr(ibmr); struct hns_roce_cmd_mailbox *mailbox; unsigned long mtpt_idx; - u32 pdn = 0; int ret; if (!mr->enabled) @@ -360,23 +325,29 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, ibdev_warn(ib_dev, "failed to destroy MPT, ret = %d.\n", ret); mr->enabled = 0; + mr->iova = virt_addr; + mr->size = length; if (flags & IB_MR_REREG_PD) - pdn = to_hr_pd(pd)->pdn; + mr->pd = to_hr_pd(pd)->pdn; + + if (flags & IB_MR_REREG_ACCESS) + mr->access = mr_access_flags; if (flags & IB_MR_REREG_TRANS) { - ret = rereg_mr_trans(ibmr, flags, - start, length, - virt_addr, mr_access_flags, - mailbox, pdn, udata); - if (ret) - goto free_cmd_mbox; - } else { - ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn, - mr_access_flags, virt_addr, - length, mailbox->buf); - if (ret) + free_mr_pbl(hr_dev, mr); + ret = alloc_mr_pbl(hr_dev, mr, udata, start); + if (ret) { + ibdev_err(ib_dev, "failed to alloc mr PBL, ret = %d.\n", + ret); goto free_cmd_mbox; + } + } + + ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, mailbox->buf); + if (ret) { + ibdev_err(ib_dev, "failed to write mtpt, ret = %d.\n", ret); + goto free_cmd_mbox; } ret = hns_roce_hw_create_mpt(hr_dev, mailbox, mtpt_idx); @@ -386,12 +357,6 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, } mr->enabled = 1; - if (flags & IB_MR_REREG_ACCESS) - mr->access = mr_access_flags; - - hns_roce_free_cmd_mailbox(hr_dev, mailbox); - - return NULL; free_cmd_mbox: hns_roce_free_cmd_mailbox(hr_dev, mailbox); @@ -421,7 +386,6 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); struct device *dev = hr_dev->dev; struct hns_roce_mr *mr; - u64 length; int ret; if (mr_type != IB_MR_TYPE_MEM_REG) @@ -438,14 +402,15 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, return ERR_PTR(-ENOMEM); mr->type = MR_TYPE_FRMR; + mr->pd = to_hr_pd(pd)->pdn; + mr->size = max_num_sg * (1 << PAGE_SHIFT); /* Allocate memory region key */ - length = max_num_sg * (1 << PAGE_SHIFT); - ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, 0, length, 0); + ret = alloc_mr_key(hr_dev, mr); if (ret) goto err_free; - ret = alloc_mr_pbl(hr_dev, mr, length, NULL, 0, 0); + ret = alloc_mr_pbl(hr_dev, mr, NULL, 0); if (ret) goto err_key; @@ -454,7 +419,7 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, goto err_pbl; mr->ibmr.rkey = mr->ibmr.lkey = mr->key; - mr->ibmr.length = length; + mr->ibmr.length = mr->size; return &mr->ibmr; @@ -631,30 +596,26 @@ int hns_roce_dealloc_mw(struct ib_mw *ibmw) } static int mtr_map_region(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - dma_addr_t *pages, struct hns_roce_buf_region *region) + struct hns_roce_buf_region *region, dma_addr_t *pages, + int max_count) { + int count, npage; + int offset, end; __le64 *mtts; - int offset; - int count; - int npage; u64 addr; - int end; int i; - /* if hopnum is 0, buffer cannot store BAs, so skip write mtt */ - if (!region->hopnum) - return 0; - offset = region->offset; end = offset + region->count; npage = 0; - while (offset < end) { + while (offset < end && npage < max_count) { + count = 0; mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, offset, &count, NULL); if (!mtts) return -ENOBUFS; - for (i = 0; i < count; i++) { + for (i = 0; i < count && npage < max_count; i++) { if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) addr = to_hr_hw_page_addr(pages[npage]); else @@ -666,7 +627,7 @@ static int mtr_map_region(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, offset += count; } - return 0; + return npage; } static inline bool mtr_has_mtt(struct hns_roce_buf_attr *attr) @@ -729,25 +690,15 @@ static void mtr_free_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) } static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - struct hns_roce_buf_attr *buf_attr, bool is_direct, + struct hns_roce_buf_attr *buf_attr, struct ib_udata *udata, unsigned long user_addr) { struct ib_device *ibdev = &hr_dev->ib_dev; - unsigned int best_pg_shift; - int all_pg_count = 0; size_t total_size; - int ret; total_size = mtr_bufs_size(buf_attr); - if (total_size < 1) { - ibdev_err(ibdev, "failed to check mtr size\n."); - return -EINVAL; - } if (udata) { - unsigned long pgsz_bitmap; - unsigned long page_size; - mtr->kmem = NULL; mtr->umem = ib_umem_get(ibdev, user_addr, total_size, buf_attr->user_access); @@ -756,76 +707,67 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, PTR_ERR(mtr->umem)); return -ENOMEM; } - if (buf_attr->fixed_page) - pgsz_bitmap = 1 << buf_attr->page_shift; - else - pgsz_bitmap = GENMASK(buf_attr->page_shift, PAGE_SHIFT); - - page_size = ib_umem_find_best_pgsz(mtr->umem, pgsz_bitmap, - user_addr); - if (!page_size) - return -EINVAL; - best_pg_shift = order_base_2(page_size); - all_pg_count = ib_umem_num_dma_blocks(mtr->umem, page_size); - ret = 0; } else { mtr->umem = NULL; - mtr->kmem = - hns_roce_buf_alloc(hr_dev, total_size, - buf_attr->page_shift, - is_direct ? HNS_ROCE_BUF_DIRECT : 0); + mtr->kmem = hns_roce_buf_alloc(hr_dev, total_size, + buf_attr->page_shift, + mtr->hem_cfg.is_direct ? + HNS_ROCE_BUF_DIRECT : 0); if (IS_ERR(mtr->kmem)) { ibdev_err(ibdev, "failed to alloc kmem, ret = %ld.\n", PTR_ERR(mtr->kmem)); return PTR_ERR(mtr->kmem); } - - best_pg_shift = buf_attr->page_shift; - all_pg_count = mtr->kmem->npages; - } - - /* must bigger than minimum hardware page shift */ - if (best_pg_shift < HNS_HW_PAGE_SHIFT || all_pg_count < 1) { - ret = -EINVAL; - ibdev_err(ibdev, - "failed to check mtr, page shift = %u count = %d.\n", - best_pg_shift, all_pg_count); - goto err_alloc_mem; } - mtr->hem_cfg.buf_pg_shift = best_pg_shift; - mtr->hem_cfg.buf_pg_count = all_pg_count; - return 0; -err_alloc_mem: - mtr_free_bufs(hr_dev, mtr); - return ret; } -static int mtr_get_pages(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - dma_addr_t *pages, int count, unsigned int page_shift) +static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + int page_count, unsigned int page_shift) { struct ib_device *ibdev = &hr_dev->ib_dev; + dma_addr_t *pages; int npage; - int err; + int ret; + + /* alloc a tmp array to store buffer's dma address */ + pages = kvcalloc(page_count, sizeof(dma_addr_t), GFP_KERNEL); + if (!pages) + return -ENOMEM; if (mtr->umem) - npage = hns_roce_get_umem_bufs(hr_dev, pages, count, 0, + npage = hns_roce_get_umem_bufs(hr_dev, pages, page_count, 0, mtr->umem, page_shift); else - npage = hns_roce_get_kmem_bufs(hr_dev, pages, count, 0, + npage = hns_roce_get_kmem_bufs(hr_dev, pages, page_count, 0, mtr->kmem); + if (npage != page_count) { + ibdev_err(ibdev, "failed to get mtr page %d != %d.\n", npage, + page_count); + ret = -ENOBUFS; + goto err_alloc_list; + } + if (mtr->hem_cfg.is_direct && npage > 1) { - err = mtr_check_direct_pages(pages, npage, page_shift); - if (err) { - ibdev_err(ibdev, "Failed to check %s direct page-%d\n", - mtr->umem ? "user" : "kernel", err); - npage = err; + ret = mtr_check_direct_pages(pages, npage, page_shift); + if (ret) { + ibdev_err(ibdev, "failed to check %s mtr, idx = %d.\n", + mtr->umem ? "user" : "kernel", ret); + ret = -ENOBUFS; + goto err_alloc_list; } } - return npage; + ret = hns_roce_mtr_map(hr_dev, mtr, pages, page_count); + if (ret) + ibdev_err(ibdev, "failed to map mtr pages, ret = %d.\n", ret); + +err_alloc_list: + kvfree(pages); + + return ret; } int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, @@ -833,8 +775,8 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, { struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_buf_region *r; - unsigned int i; - int err; + unsigned int i, mapped_cnt; + int ret; /* * Only use the first page address as root ba when hopnum is 0, this @@ -845,26 +787,42 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, return 0; } - for (i = 0; i < mtr->hem_cfg.region_count; i++) { + for (i = 0, mapped_cnt = 0; i < mtr->hem_cfg.region_count && + mapped_cnt < page_cnt; i++) { r = &mtr->hem_cfg.region[i]; + /* if hopnum is 0, no need to map pages in this region */ + if (!r->hopnum) { + mapped_cnt += r->count; + continue; + } + if (r->offset + r->count > page_cnt) { - err = -EINVAL; + ret = -EINVAL; ibdev_err(ibdev, "failed to check mtr%u end %u + %u, max %u.\n", i, r->offset, r->count, page_cnt); - return err; + return ret; } - err = mtr_map_region(hr_dev, mtr, &pages[r->offset], r); - if (err) { + ret = mtr_map_region(hr_dev, mtr, r, &pages[r->offset], + page_cnt - mapped_cnt); + if (ret < 0) { ibdev_err(ibdev, "failed to map mtr%u offset %u, ret = %d.\n", - i, r->offset, err); - return err; + i, r->offset, ret); + return ret; } + mapped_cnt += ret; + ret = 0; } - return 0; + if (mapped_cnt < page_cnt) { + ret = -ENOBUFS; + ibdev_err(ibdev, "failed to map mtr pages count: %u < %u.\n", + mapped_cnt, page_cnt); + } + + return ret; } int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, @@ -928,68 +886,92 @@ done: static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev, struct hns_roce_buf_attr *attr, struct hns_roce_hem_cfg *cfg, - unsigned int *buf_page_shift) + unsigned int *buf_page_shift, int unalinged_size) { struct hns_roce_buf_region *r; + int first_region_padding; + int page_cnt, region_cnt; unsigned int page_shift; - int page_cnt = 0; size_t buf_size; - int region_cnt; + /* If mtt is disabled, all pages must be within a continuous range */ + cfg->is_direct = !mtr_has_mtt(attr); + buf_size = mtr_bufs_size(attr); if (cfg->is_direct) { - buf_size = cfg->buf_pg_count << cfg->buf_pg_shift; - page_cnt = DIV_ROUND_UP(buf_size, HNS_HW_PAGE_SIZE); - /* - * When HEM buffer use level-0 addressing, the page size equals - * the buffer size, and the the page size = 4K * 2^N. + /* When HEM buffer uses 0-level addressing, the page size is + * equal to the whole buffer size, and we split the buffer into + * small pages which is used to check whether the adjacent + * units are in the continuous space and its size is fixed to + * 4K based on hns ROCEE's requirement. */ - cfg->buf_pg_shift = HNS_HW_PAGE_SHIFT + order_base_2(page_cnt); - if (attr->region_count > 1) { - cfg->buf_pg_count = page_cnt; - page_shift = HNS_HW_PAGE_SHIFT; - } else { - cfg->buf_pg_count = 1; - page_shift = cfg->buf_pg_shift; - if (buf_size != 1 << page_shift) { - ibdev_err(&hr_dev->ib_dev, - "failed to check direct size %zu shift %d.\n", - buf_size, page_shift); - return -EINVAL; - } - } + page_shift = HNS_HW_PAGE_SHIFT; + + /* The ROCEE requires the page size to be 4K * 2 ^ N. */ + cfg->buf_pg_count = 1; + cfg->buf_pg_shift = HNS_HW_PAGE_SHIFT + + order_base_2(DIV_ROUND_UP(buf_size, HNS_HW_PAGE_SIZE)); + first_region_padding = 0; } else { - page_shift = cfg->buf_pg_shift; + page_shift = attr->page_shift; + cfg->buf_pg_count = DIV_ROUND_UP(buf_size + unalinged_size, + 1 << page_shift); + cfg->buf_pg_shift = page_shift; + first_region_padding = unalinged_size; } - /* convert buffer size to page index and page count */ - for (page_cnt = 0, region_cnt = 0; page_cnt < cfg->buf_pg_count && - region_cnt < attr->region_count && + /* Convert buffer size to page index and page count for each region and + * the buffer's offset needs to be appended to the first region. + */ + for (page_cnt = 0, region_cnt = 0; region_cnt < attr->region_count && region_cnt < ARRAY_SIZE(cfg->region); region_cnt++) { r = &cfg->region[region_cnt]; r->offset = page_cnt; - buf_size = hr_hw_page_align(attr->region[region_cnt].size); + buf_size = hr_hw_page_align(attr->region[region_cnt].size + + first_region_padding); r->count = DIV_ROUND_UP(buf_size, 1 << page_shift); + first_region_padding = 0; page_cnt += r->count; r->hopnum = to_hr_hem_hopnum(attr->region[region_cnt].hopnum, r->count); } - if (region_cnt < 1) { - ibdev_err(&hr_dev->ib_dev, - "failed to check mtr region count, pages = %d.\n", - cfg->buf_pg_count); - return -ENOBUFS; - } - cfg->region_count = region_cnt; *buf_page_shift = page_shift; return page_cnt; } +static int mtr_alloc_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + unsigned int ba_page_shift) +{ + struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg; + int ret; + + hns_roce_hem_list_init(&mtr->hem_list); + if (!cfg->is_direct) { + ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, + cfg->region, cfg->region_count, + ba_page_shift); + if (ret) + return ret; + cfg->root_ba = mtr->hem_list.root_ba; + cfg->ba_pg_shift = ba_page_shift; + } else { + cfg->ba_pg_shift = cfg->buf_pg_shift; + } + + return 0; +} + +static void mtr_free_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) +{ + hns_roce_hem_list_release(hr_dev, &mtr->hem_list); +} + /** * hns_roce_mtr_create - Create hns memory translate region. * + * @hr_dev: RoCE device struct pointer * @mtr: memory translate region * @buf_attr: buffer attribute for creating mtr * @ba_page_shift: page shift for multi-hop base address table @@ -1001,95 +983,51 @@ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, unsigned int ba_page_shift, struct ib_udata *udata, unsigned long user_addr) { - struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg; struct ib_device *ibdev = &hr_dev->ib_dev; unsigned int buf_page_shift = 0; - dma_addr_t *pages = NULL; - int all_pg_cnt; - int get_pg_cnt; - int ret = 0; - - /* if disable mtt, all pages must in a continuous address range */ - cfg->is_direct = !mtr_has_mtt(buf_attr); - - /* if buffer only need mtt, just init the hem cfg */ - if (buf_attr->mtt_only) { - cfg->buf_pg_shift = buf_attr->page_shift; - cfg->buf_pg_count = mtr_bufs_size(buf_attr) >> - buf_attr->page_shift; - mtr->umem = NULL; - mtr->kmem = NULL; - } else { - ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, cfg->is_direct, - udata, user_addr); - if (ret) { - ibdev_err(ibdev, - "failed to alloc mtr bufs, ret = %d.\n", ret); - return ret; - } - } + int buf_page_cnt; + int ret; - all_pg_cnt = mtr_init_buf_cfg(hr_dev, buf_attr, cfg, &buf_page_shift); - if (all_pg_cnt < 1) { - ret = -ENOBUFS; - ibdev_err(ibdev, "failed to init mtr buf cfg.\n"); - goto err_alloc_bufs; + buf_page_cnt = mtr_init_buf_cfg(hr_dev, buf_attr, &mtr->hem_cfg, + &buf_page_shift, + udata ? user_addr & ~PAGE_MASK : 0); + if (buf_page_cnt < 1 || buf_page_shift < HNS_HW_PAGE_SHIFT) { + ibdev_err(ibdev, "failed to init mtr cfg, count %d shift %d.\n", + buf_page_cnt, buf_page_shift); + return -EINVAL; } - hns_roce_hem_list_init(&mtr->hem_list); - if (!cfg->is_direct) { - ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, - cfg->region, cfg->region_count, - ba_page_shift); - if (ret) { - ibdev_err(ibdev, "failed to request mtr hem, ret = %d.\n", - ret); - goto err_alloc_bufs; - } - cfg->root_ba = mtr->hem_list.root_ba; - cfg->ba_pg_shift = ba_page_shift; - } else { - cfg->ba_pg_shift = cfg->buf_pg_shift; + ret = mtr_alloc_mtt(hr_dev, mtr, ba_page_shift); + if (ret) { + ibdev_err(ibdev, "failed to alloc mtr mtt, ret = %d.\n", ret); + return ret; } - /* no buffer to map */ - if (buf_attr->mtt_only) + /* The caller has its own buffer list and invokes the hns_roce_mtr_map() + * to finish the MTT configuration. + */ + if (buf_attr->mtt_only) { + mtr->umem = NULL; + mtr->kmem = NULL; return 0; - - /* alloc a tmp array to store buffer's dma address */ - pages = kvcalloc(all_pg_cnt, sizeof(dma_addr_t), GFP_KERNEL); - if (!pages) { - ret = -ENOMEM; - ibdev_err(ibdev, "failed to alloc mtr page list %d.\n", - all_pg_cnt); - goto err_alloc_hem_list; - } - - get_pg_cnt = mtr_get_pages(hr_dev, mtr, pages, all_pg_cnt, - buf_page_shift); - if (get_pg_cnt != all_pg_cnt) { - ibdev_err(ibdev, "failed to get mtr page %d != %d.\n", - get_pg_cnt, all_pg_cnt); - ret = -ENOBUFS; - goto err_alloc_page_list; } - /* write buffer's dma address to BA table */ - ret = hns_roce_mtr_map(hr_dev, mtr, pages, all_pg_cnt); + ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, udata, user_addr); if (ret) { - ibdev_err(ibdev, "failed to map mtr pages, ret = %d.\n", ret); - goto err_alloc_page_list; + ibdev_err(ibdev, "failed to alloc mtr bufs, ret = %d.\n", ret); + goto err_alloc_mtt; } - /* drop tmp array */ - kvfree(pages); - return 0; -err_alloc_page_list: - kvfree(pages); -err_alloc_hem_list: - hns_roce_hem_list_release(hr_dev, &mtr->hem_list); -err_alloc_bufs: + /* Write buffer's dma address to MTT */ + ret = mtr_map_bufs(hr_dev, mtr, buf_page_cnt, buf_page_shift); + if (ret) + ibdev_err(ibdev, "failed to map mtr bufs, ret = %d.\n", ret); + else + return 0; + mtr_free_bufs(hr_dev, mtr); +err_alloc_mtt: + mtr_free_mtt(hr_dev, mtr); return ret; } diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 1116371adf74..004aca9086ab 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -413,9 +413,32 @@ static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) mutex_unlock(&hr_dev->qp_table.bank_mutex); } +static u32 proc_rq_sge(struct hns_roce_dev *dev, struct hns_roce_qp *hr_qp, + bool user) +{ + u32 max_sge = dev->caps.max_rq_sg; + + if (dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + return max_sge; + + /* Reserve SGEs only for HIP08 in kernel; The userspace driver will + * calculate number of max_sge with reserved SGEs when allocating wqe + * buf, so there is no need to do this again in kernel. But the number + * may exceed the capacity of SGEs recorded in the firmware, so the + * kernel driver should just adapt the value accordingly. + */ + if (user) + max_sge = roundup_pow_of_two(max_sge + 1); + else + hr_qp->rq.rsv_sge = 1; + + return max_sge; +} + static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, - struct hns_roce_qp *hr_qp, int has_rq) + struct hns_roce_qp *hr_qp, int has_rq, bool user) { + u32 max_sge = proc_rq_sge(hr_dev, hr_qp, user); u32 cnt; /* If srq exist, set zero for relative number of rq */ @@ -431,8 +454,9 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, /* Check the validity of QP support capacity */ if (!cap->max_recv_wr || cap->max_recv_wr > hr_dev->caps.max_wqes || - cap->max_recv_sge > hr_dev->caps.max_rq_sg) { - ibdev_err(&hr_dev->ib_dev, "RQ config error, depth=%u, sge=%d\n", + cap->max_recv_sge > max_sge) { + ibdev_err(&hr_dev->ib_dev, + "RQ config error, depth = %u, sge = %u\n", cap->max_recv_wr, cap->max_recv_sge); return -EINVAL; } @@ -444,7 +468,8 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, return -EINVAL; } - hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge)); + hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge) + + hr_qp->rq.rsv_sge); if (hr_dev->caps.max_rq_sg <= HNS_ROCE_SGE_IN_WQE) hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz); @@ -459,7 +484,7 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, hr_qp->rq_inl_buf.wqe_cnt = 0; cap->max_recv_wr = cnt; - cap->max_recv_sge = hr_qp->rq.max_gs; + cap->max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge; return 0; } @@ -599,7 +624,6 @@ static int set_wqe_buf_attr(struct hns_roce_dev *hr_dev, return -EINVAL; buf_attr->page_shift = HNS_HW_PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz; - buf_attr->fixed_page = true; buf_attr->region_count = idx; return 0; @@ -919,7 +943,7 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR; ret = set_rq_size(hr_dev, &init_attr->cap, hr_qp, - hns_roce_qp_has_rq(init_attr)); + hns_roce_qp_has_rq(init_attr), !!udata); if (ret) { ibdev_err(ibdev, "failed to set user RQ size, ret = %d.\n", ret); diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index c4ae57e4173a..d5a6de0e7095 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -3,6 +3,7 @@ * Copyright (c) 2018 Hisilicon Limited. */ +#include <linux/pci.h> #include <rdma/ib_umem.h> #include "hns_roce_device.h" #include "hns_roce_cmd.h" @@ -76,40 +77,16 @@ static int hns_roce_hw_destroy_srq(struct hns_roce_dev *dev, HNS_ROCE_CMD_TIMEOUT_MSECS); } -static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, - u32 pdn, u32 cqn, u16 xrcd, u64 db_rec_addr) +static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) { struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_cmd_mailbox *mailbox; - u64 mtts_wqe[MTT_MIN_COUNT] = { 0 }; - u64 mtts_idx[MTT_MIN_COUNT] = { 0 }; - dma_addr_t dma_handle_wqe = 0; - dma_addr_t dma_handle_idx = 0; int ret; - /* Get the physical address of srq buf */ - ret = hns_roce_mtr_find(hr_dev, &srq->buf_mtr, 0, mtts_wqe, - ARRAY_SIZE(mtts_wqe), &dma_handle_wqe); - if (ret < 1) { - ibdev_err(ibdev, "failed to find mtr for SRQ WQE, ret = %d.\n", - ret); - return -ENOBUFS; - } - - /* Get physical address of idx que buf */ - ret = hns_roce_mtr_find(hr_dev, &srq->idx_que.mtr, 0, mtts_idx, - ARRAY_SIZE(mtts_idx), &dma_handle_idx); - if (ret < 1) { - ibdev_err(ibdev, "failed to find mtr for SRQ idx, ret = %d.\n", - ret); - return -ENOBUFS; - } - ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn); if (ret) { - ibdev_err(ibdev, - "failed to alloc SRQ number, ret = %d.\n", ret); + ibdev_err(ibdev, "failed to alloc SRQ number.\n"); return -ENOMEM; } @@ -127,34 +104,36 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); if (IS_ERR_OR_NULL(mailbox)) { - ret = -ENOMEM; ibdev_err(ibdev, "failed to alloc mailbox for SRQC.\n"); + ret = -ENOMEM; goto err_xa; } - hr_dev->hw->write_srqc(hr_dev, srq, pdn, xrcd, cqn, mailbox->buf, - mtts_wqe, mtts_idx, dma_handle_wqe, - dma_handle_idx); + ret = hr_dev->hw->write_srqc(srq, mailbox->buf); + if (ret) { + ibdev_err(ibdev, "failed to write SRQC.\n"); + goto err_mbox; + } ret = hns_roce_hw_create_srq(hr_dev, mailbox, srq->srqn); - hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) { ibdev_err(ibdev, "failed to config SRQC, ret = %d.\n", ret); - goto err_xa; + goto err_mbox; } - atomic_set(&srq->refcount, 1); - init_completion(&srq->free); - return ret; + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + + return 0; +err_mbox: + hns_roce_free_cmd_mailbox(hr_dev, mailbox); err_xa: xa_erase(&srq_table->xa, srq->srqn); - err_put: hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn); - err_out: hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR); + return ret; } @@ -178,46 +157,13 @@ static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR); } -static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, - struct ib_udata *udata, unsigned long addr) -{ - struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_buf_attr buf_attr = {}; - int err; - - srq->wqe_shift = ilog2(roundup_pow_of_two(max(HNS_ROCE_SGE_SIZE, - HNS_ROCE_SGE_SIZE * - srq->max_gs))); - - buf_attr.page_shift = hr_dev->caps.srqwqe_buf_pg_sz + HNS_HW_PAGE_SHIFT; - buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt, - srq->wqe_shift); - buf_attr.region[0].hopnum = hr_dev->caps.srqwqe_hop_num; - buf_attr.region_count = 1; - buf_attr.fixed_page = true; - - err = hns_roce_mtr_create(hr_dev, &srq->buf_mtr, &buf_attr, - hr_dev->caps.srqwqe_ba_pg_sz + - HNS_HW_PAGE_SHIFT, udata, addr); - if (err) - ibdev_err(ibdev, - "failed to alloc SRQ buf mtr, ret = %d.\n", err); - - return err; -} - -static void free_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) -{ - hns_roce_mtr_destroy(hr_dev, &srq->buf_mtr); -} - static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, struct ib_udata *udata, unsigned long addr) { struct hns_roce_idx_que *idx_que = &srq->idx_que; struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_buf_attr buf_attr = {}; - int err; + int ret; srq->idx_que.entry_shift = ilog2(HNS_ROCE_IDX_QUE_ENTRY_SZ); @@ -226,31 +172,33 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, srq->idx_que.entry_shift); buf_attr.region[0].hopnum = hr_dev->caps.idx_hop_num; buf_attr.region_count = 1; - buf_attr.fixed_page = true; - err = hns_roce_mtr_create(hr_dev, &idx_que->mtr, &buf_attr, + ret = hns_roce_mtr_create(hr_dev, &idx_que->mtr, &buf_attr, hr_dev->caps.idx_ba_pg_sz + HNS_HW_PAGE_SHIFT, udata, addr); - if (err) { + if (ret) { ibdev_err(ibdev, - "failed to alloc SRQ idx mtr, ret = %d.\n", err); - return err; + "failed to alloc SRQ idx mtr, ret = %d.\n", ret); + return ret; } if (!udata) { idx_que->bitmap = bitmap_zalloc(srq->wqe_cnt, GFP_KERNEL); if (!idx_que->bitmap) { ibdev_err(ibdev, "failed to alloc SRQ idx bitmap.\n"); - err = -ENOMEM; + ret = -ENOMEM; goto err_idx_mtr; } } + idx_que->head = 0; + idx_que->tail = 0; + return 0; err_idx_mtr: hns_roce_mtr_destroy(hr_dev, &idx_que->mtr); - return err; + return ret; } static void free_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) @@ -262,10 +210,42 @@ static void free_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) hns_roce_mtr_destroy(hr_dev, &idx_que->mtr); } +static int alloc_srq_wqe_buf(struct hns_roce_dev *hr_dev, + struct hns_roce_srq *srq, + struct ib_udata *udata, unsigned long addr) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_buf_attr buf_attr = {}; + int ret; + + srq->wqe_shift = ilog2(roundup_pow_of_two(max(HNS_ROCE_SGE_SIZE, + HNS_ROCE_SGE_SIZE * + srq->max_gs))); + + buf_attr.page_shift = hr_dev->caps.srqwqe_buf_pg_sz + HNS_HW_PAGE_SHIFT; + buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt, + srq->wqe_shift); + buf_attr.region[0].hopnum = hr_dev->caps.srqwqe_hop_num; + buf_attr.region_count = 1; + + ret = hns_roce_mtr_create(hr_dev, &srq->buf_mtr, &buf_attr, + hr_dev->caps.srqwqe_ba_pg_sz + + HNS_HW_PAGE_SHIFT, udata, addr); + if (ret) + ibdev_err(ibdev, + "failed to alloc SRQ buf mtr, ret = %d.\n", ret); + + return ret; +} + +static void free_srq_wqe_buf(struct hns_roce_dev *hr_dev, + struct hns_roce_srq *srq) +{ + hns_roce_mtr_destroy(hr_dev, &srq->buf_mtr); +} + static int alloc_srq_wrid(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) { - srq->head = 0; - srq->tail = srq->wqe_cnt - 1; srq->wrid = kvmalloc_array(srq->wqe_cnt, sizeof(u64), GFP_KERNEL); if (!srq->wrid) return -ENOMEM; @@ -279,96 +259,171 @@ static void free_srq_wrid(struct hns_roce_srq *srq) srq->wrid = NULL; } -int hns_roce_create_srq(struct ib_srq *ib_srq, - struct ib_srq_init_attr *init_attr, - struct ib_udata *udata) +static u32 proc_srq_sge(struct hns_roce_dev *dev, struct hns_roce_srq *hr_srq, + bool user) +{ + u32 max_sge = dev->caps.max_srq_sges; + + if (dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + return max_sge; + + /* Reserve SGEs only for HIP08 in kernel; The userspace driver will + * calculate number of max_sge with reserved SGEs when allocating wqe + * buf, so there is no need to do this again in kernel. But the number + * may exceed the capacity of SGEs recorded in the firmware, so the + * kernel driver should just adapt the value accordingly. + */ + if (user) + max_sge = roundup_pow_of_two(max_sge + 1); + else + hr_srq->rsv_sge = 1; + + return max_sge; +} + +static int set_srq_basic_param(struct hns_roce_srq *srq, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device); + struct ib_srq_attr *attr = &init_attr->attr; + u32 max_sge; + + max_sge = proc_srq_sge(hr_dev, srq, !!udata); + if (attr->max_wr > hr_dev->caps.max_srq_wrs || + attr->max_sge > max_sge) { + ibdev_err(&hr_dev->ib_dev, + "invalid SRQ attr, depth = %u, sge = %u.\n", + attr->max_wr, attr->max_sge); + return -EINVAL; + } + + attr->max_wr = max_t(u32, attr->max_wr, HNS_ROCE_MIN_SRQ_WQE_NUM); + srq->wqe_cnt = roundup_pow_of_two(attr->max_wr); + srq->max_gs = roundup_pow_of_two(attr->max_sge + srq->rsv_sge); + + attr->max_wr = srq->wqe_cnt; + attr->max_sge = srq->max_gs - srq->rsv_sge; + attr->srq_limit = 0; + + return 0; +} + +static void set_srq_ext_param(struct hns_roce_srq *srq, + struct ib_srq_init_attr *init_attr) +{ + srq->cqn = ib_srq_has_cq(init_attr->srq_type) ? + to_hr_cq(init_attr->ext.cq)->cqn : 0; +} + +static int set_srq_param(struct hns_roce_srq *srq, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) { - struct hns_roce_dev *hr_dev = to_hr_dev(ib_srq->device); - struct hns_roce_ib_create_srq_resp resp = {}; - struct hns_roce_srq *srq = to_hr_srq(ib_srq); - struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_ib_create_srq ucmd = {}; int ret; - u32 cqn; - if (init_attr->srq_type != IB_SRQT_BASIC && - init_attr->srq_type != IB_SRQT_XRC) - return -EOPNOTSUPP; + ret = set_srq_basic_param(srq, init_attr, udata); + if (ret) + return ret; - /* Check the actual SRQ wqe and SRQ sge num */ - if (init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs || - init_attr->attr.max_sge > hr_dev->caps.max_srq_sges) - return -EINVAL; + set_srq_ext_param(srq, init_attr); - mutex_init(&srq->mutex); - spin_lock_init(&srq->lock); + return 0; +} - srq->wqe_cnt = roundup_pow_of_two(init_attr->attr.max_wr + 1); - srq->max_gs = init_attr->attr.max_sge; +static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, + struct ib_udata *udata) +{ + struct hns_roce_ib_create_srq ucmd = {}; + int ret; if (udata) { ret = ib_copy_from_udata(&ucmd, udata, min(udata->inlen, sizeof(ucmd))); if (ret) { - ibdev_err(ibdev, "failed to copy SRQ udata, ret = %d.\n", + ibdev_err(&hr_dev->ib_dev, + "failed to copy SRQ udata, ret = %d.\n", ret); return ret; } } - ret = alloc_srq_buf(hr_dev, srq, udata, ucmd.buf_addr); - if (ret) { - ibdev_err(ibdev, - "failed to alloc SRQ buffer, ret = %d.\n", ret); + ret = alloc_srq_idx(hr_dev, srq, udata, ucmd.que_addr); + if (ret) return ret; - } - ret = alloc_srq_idx(hr_dev, srq, udata, ucmd.que_addr); - if (ret) { - ibdev_err(ibdev, "failed to alloc SRQ idx, ret = %d.\n", ret); - goto err_buf_alloc; - } + ret = alloc_srq_wqe_buf(hr_dev, srq, udata, ucmd.buf_addr); + if (ret) + goto err_idx; if (!udata) { ret = alloc_srq_wrid(hr_dev, srq); - if (ret) { - ibdev_err(ibdev, "failed to alloc SRQ wrid, ret = %d.\n", - ret); - goto err_idx_alloc; - } + if (ret) + goto err_wqe_buf; } - cqn = ib_srq_has_cq(init_attr->srq_type) ? - to_hr_cq(init_attr->ext.cq)->cqn : 0; - srq->db_reg_l = hr_dev->reg_base + SRQ_DB_REG; + return 0; - ret = alloc_srqc(hr_dev, srq, to_hr_pd(ib_srq->pd)->pdn, cqn, 0, 0); - if (ret) { - ibdev_err(ibdev, - "failed to alloc SRQ context, ret = %d.\n", ret); - goto err_wrid_alloc; - } +err_wqe_buf: + free_srq_wqe_buf(hr_dev, srq); +err_idx: + free_srq_idx(hr_dev, srq); - srq->event = hns_roce_ib_srq_event; - resp.srqn = srq->srqn; + return ret; +} + +static void free_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) +{ + free_srq_wrid(srq); + free_srq_wqe_buf(hr_dev, srq); + free_srq_idx(hr_dev, srq); +} + +int hns_roce_create_srq(struct ib_srq *ib_srq, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ib_srq->device); + struct hns_roce_ib_create_srq_resp resp = {}; + struct hns_roce_srq *srq = to_hr_srq(ib_srq); + int ret; + + mutex_init(&srq->mutex); + spin_lock_init(&srq->lock); + + ret = set_srq_param(srq, init_attr, udata); + if (ret) + return ret; + + ret = alloc_srq_buf(hr_dev, srq, udata); + if (ret) + return ret; + + ret = alloc_srqc(hr_dev, srq); + if (ret) + goto err_srq_buf; if (udata) { - ret = ib_copy_to_udata(udata, &resp, - min(udata->outlen, sizeof(resp))); - if (ret) - goto err_srqc_alloc; + resp.srqn = srq->srqn; + if (ib_copy_to_udata(udata, &resp, + min(udata->outlen, sizeof(resp)))) { + ret = -EFAULT; + goto err_srqc; + } } + srq->db_reg_l = hr_dev->reg_base + SRQ_DB_REG; + srq->event = hns_roce_ib_srq_event; + atomic_set(&srq->refcount, 1); + init_completion(&srq->free); + return 0; -err_srqc_alloc: +err_srqc: free_srqc(hr_dev, srq); -err_wrid_alloc: - free_srq_wrid(srq); -err_idx_alloc: - free_srq_idx(hr_dev, srq); -err_buf_alloc: +err_srq_buf: free_srq_buf(hr_dev, srq); + return ret; } @@ -378,8 +433,6 @@ int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) struct hns_roce_srq *srq = to_hr_srq(ibsrq); free_srqc(hr_dev, srq); - free_srq_idx(hr_dev, srq); - free_srq_wrid(srq); free_srq_buf(hr_dev, srq); return 0; } diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 9acc0ecc9a43..ac65c8237b2e 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -70,7 +70,7 @@ static void i40iw_disconnect_worker(struct work_struct *work); /** * i40iw_free_sqbuf - put back puda buffer if refcount = 0 * @vsi: pointer to vsi structure - * @buf: puda buffer to free + * @bufp: puda buffer to free */ void i40iw_free_sqbuf(struct i40iw_sc_vsi *vsi, void *bufp) { @@ -729,6 +729,7 @@ static int i40iw_handle_tcp_options(struct i40iw_cm_node *cm_node, /** * i40iw_build_mpa_v1 - build a MPA V1 frame * @cm_node: connection's node + * @start_addr: MPA frame start address * @mpa_key: to do read0 or write0 */ static void i40iw_build_mpa_v1(struct i40iw_cm_node *cm_node, @@ -1040,7 +1041,7 @@ negotiate_done: /** * i40iw_schedule_cm_timer - * @@cm_node: connection's node + * @cm_node: connection's node * @sqbuf: buffer to send * @type: if it is send or close * @send_retrans: if rexmits to be done @@ -1205,7 +1206,7 @@ static void i40iw_build_timer_list(struct list_head *timer_list, /** * i40iw_cm_timer_tick - system's timer expired callback - * @pass: Pointing to cm_core + * @t: Timer instance to fetch the cm_core pointer from */ static void i40iw_cm_timer_tick(struct timer_list *t) { @@ -1463,6 +1464,7 @@ struct i40iw_cm_node *i40iw_find_node(struct i40iw_cm_core *cm_core, * @cm_core: cm's core * @dst_port: listener tcp port num * @dst_addr: listener ip addr + * @vlan_id: vlan id for the given address * @listener_state: state to match with listen node's */ static struct i40iw_cm_listener *i40iw_find_listener( @@ -1521,7 +1523,7 @@ static void i40iw_add_hte_node(struct i40iw_cm_core *cm_core, /** * i40iw_find_port - find port that matches reference port * @hte: ptr to accelerated or non-accelerated list - * @accelerated_list: flag for accelerated vs non-accelerated list + * @port: port number to locate */ static bool i40iw_find_port(struct list_head *hte, u16 port) { @@ -1834,6 +1836,7 @@ exit: /** * i40iw_dec_refcnt_listen - delete listener and associated cm nodes * @cm_core: cm's core + * @listener: passive connection's listener * @free_hanging_nodes: to free associated cm_nodes * @apbvt_del: flag to delete the apbvt */ @@ -2029,7 +2032,7 @@ static int i40iw_addr_resolve_neigh(struct i40iw_device *iwdev, return rc; } -/** +/* * i40iw_get_dst_ipv6 */ static struct dst_entry *i40iw_get_dst_ipv6(struct sockaddr_in6 *src_addr, @@ -2051,7 +2054,8 @@ static struct dst_entry *i40iw_get_dst_ipv6(struct sockaddr_in6 *src_addr, /** * i40iw_addr_resolve_neigh_ipv6 - resolve neighbor ipv6 address * @iwdev: iwarp device structure - * @dst_ip: remote ip address + * @src: source ip address + * @dest: remote ip address * @arpindex: if there is an arp entry */ static int i40iw_addr_resolve_neigh_ipv6(struct i40iw_device *iwdev, @@ -3004,7 +3008,7 @@ static struct i40iw_cm_node *i40iw_create_cm_node( /** * i40iw_cm_reject - reject and teardown a connection * @cm_node: connection's node - * @pdate: ptr to private data for reject + * @pdata: ptr to private data for reject * @plen: size of private data */ static int i40iw_cm_reject(struct i40iw_cm_node *cm_node, const void *pdata, u8 plen) @@ -4302,7 +4306,7 @@ set_qhash: * i40iw_cm_teardown_connections - teardown QPs * @iwdev: device pointer * @ipaddr: Pointer to IPv4 or IPv6 address - * @ipv4: flag indicating IPv4 when true + * @nfo: cm info node * @disconnect_all: flag indicating disconnect all QPs * teardown QPs where source or destination addr matches ip addr */ @@ -4358,6 +4362,7 @@ void i40iw_cm_teardown_connections(struct i40iw_device *iwdev, u32 *ipaddr, /** * i40iw_ifdown_notify - process an ifdown on an interface * @iwdev: device pointer + * @netdev: network interface device structure * @ipaddr: Pointer to IPv4 or IPv6 address * @ipv4: flag indicating IPv4 when true * @ifup: flag indicating interface up when true diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index c943d491b72b..eaea5d545eb8 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -181,7 +181,7 @@ static enum i40iw_status_code i40iw_sc_parse_fpm_commit_buf( * i40iw_sc_decode_fpm_query() - Decode a 64 bit value into max count and size * @buf: ptr to fpm query buffer * @buf_idx: index into buf - * @info: ptr to i40iw_hmc_obj_info struct + * @obj_info: ptr to i40iw_hmc_obj_info struct * @rsrc_idx: resource index into info * * Decode a 64 bit value from fpm query buffer into max count and size @@ -205,7 +205,7 @@ static u64 i40iw_sc_decode_fpm_query(u64 *buf, /** * i40iw_sc_parse_fpm_query_buf() - parses fpm query buffer * @buf: ptr to fpm query buffer - * @info: ptr to i40iw_hmc_obj_info struct + * @hmc_info: ptr to i40iw_hmc_obj_info struct * @hmc_fpm_misc: ptr to fpm data * * parses fpm query buffer and copy max_cnt and @@ -775,7 +775,7 @@ static enum i40iw_status_code i40iw_sc_ccq_get_cqe_info( * i40iw_sc_poll_for_cqp_op_done - Waits for last write to complete in CQP SQ * @cqp: struct for cqp hw * @op_code: cqp opcode for completion - * @info: completion q entry to return + * @compl_info: completion q entry to return */ static enum i40iw_status_code i40iw_sc_poll_for_cqp_op_done( struct i40iw_sc_cqp *cqp, @@ -933,7 +933,7 @@ static enum i40iw_status_code i40iw_sc_commit_fpm_values_done(struct i40iw_sc_cq * @cqp: struct for cqp hw * @scratch: u64 saved to be used during cqp completion * @hmc_fn_id: hmc function id - * @commit_fpm_mem; Memory for fpm values + * @commit_fpm_mem: Memory for fpm values * @post_sq: flag for cqp db to ring * @wait_type: poll ccq or cqp registers for cqp completion */ @@ -1026,7 +1026,7 @@ i40iw_sc_query_rdma_features(struct i40iw_sc_cqp *cqp, /** * i40iw_get_rdma_features - get RDMA features - * @dev - sc device struct + * @dev: sc device struct */ enum i40iw_status_code i40iw_get_rdma_features(struct i40iw_sc_dev *dev) { @@ -1456,7 +1456,7 @@ static enum i40iw_status_code i40iw_sc_add_local_mac_ipaddr_entry( * @cqp: struct for cqp hw * @scratch: u64 saved to be used during cqp completion * @entry_idx: index of mac entry - * @ ignore_ref_count: to force mac adde delete + * @ignore_ref_count: to force mac adde delete * @post_sq: flag for cqp db to ring */ static enum i40iw_status_code i40iw_sc_del_local_mac_ipaddr_entry( @@ -2304,7 +2304,7 @@ static enum i40iw_status_code i40iw_sc_cq_destroy(struct i40iw_sc_cq *cq, * i40iw_sc_cq_modify - modify a Completion Queue * @cq: cq struct * @info: modification info struct - * @scratch: + * @scratch: u64 saved to be used during cqp completion * @post_sq: flag to post to sq */ static enum i40iw_status_code i40iw_sc_cq_modify(struct i40iw_sc_cq *cq, @@ -3673,7 +3673,7 @@ static enum i40iw_status_code i40iw_sc_configure_iw_fpm(struct i40iw_sc_dev *dev /** * cqp_sds_wqe_fill - fill cqp wqe doe sd * @cqp: struct for cqp hw - * @info; sd info for wqe + * @info: sd info for wqe * @scratch: u64 saved to be used during cqp completion */ static enum i40iw_status_code cqp_sds_wqe_fill(struct i40iw_sc_cqp *cqp, @@ -4884,7 +4884,7 @@ void i40iw_hw_stats_init(struct i40iw_vsi_pestat *stats, u8 fcn_idx, bool is_pf) /** * i40iw_hw_stats_read_32 - Read 32-bit HW stats counters and accommodates for roll-overs. - * @stat: pestat struct + * @stats: pestat struct * @index: index in HW stats table which contains offset reg-addr * @value: hw stats value */ diff --git a/drivers/infiniband/hw/i40iw/i40iw_hmc.c b/drivers/infiniband/hw/i40iw/i40iw_hmc.c index 5484cbf55f0f..8bd72af9e099 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_hmc.c +++ b/drivers/infiniband/hw/i40iw/i40iw_hmc.c @@ -46,7 +46,7 @@ * i40iw_find_sd_index_limit - finds segment descriptor index limit * @hmc_info: pointer to the HMC configuration information structure * @type: type of HMC resources we're searching - * @index: starting index for the object + * @idx: starting index for the object * @cnt: number of objects we're trying to create * @sd_idx: pointer to return index of the segment descriptor in question * @sd_limit: pointer to return the maximum number of segment descriptors @@ -78,7 +78,7 @@ static inline void i40iw_find_sd_index_limit(struct i40iw_hmc_info *hmc_info, * @type: HMC resource type we're examining * @idx: starting index for the object * @cnt: number of objects we're trying to create - * @pd_index: pointer to return page descriptor index + * @pd_idx: pointer to return page descriptor index * @pd_limit: pointer to return page descriptor index limit * * Calculates the page descriptor index and index limit for the resource diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c index 56fdc161f6f8..d167ac10c751 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_hw.c +++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c @@ -165,7 +165,7 @@ static void i40iw_cqp_ce_handler(struct i40iw_device *iwdev, struct i40iw_sc_cq /** * i40iw_iwarp_ce_handler - handle iwarp completions * @iwdev: iwarp device - * @iwcp: iwarp cq receiving event + * @iwcq: iwarp cq receiving event */ static void i40iw_iwarp_ce_handler(struct i40iw_device *iwdev, struct i40iw_sc_cq *iwcq) @@ -519,6 +519,7 @@ enum i40iw_status_code i40iw_manage_apbvt(struct i40iw_device *iwdev, * @iwdev: iwarp device * @mac_addr: mac address ptr * @ip_addr: ip addr for arp cache + * @ipv4: flag indicating IPv4 when true * @action: add, delete or modify */ void i40iw_manage_arp_cache(struct i40iw_device *iwdev, @@ -581,7 +582,6 @@ static void i40iw_send_syn_cqp_callback(struct i40iw_cqp_request *cqp_request, u * @mtype: type of qhash * @cmnode: cmnode associated with connection * @wait: wait for completion - * @user_pri:user pri of the connection */ enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev, struct i40iw_cm_info *cminfo, diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 584932d3cc44..ab4cb11950dc 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -186,7 +186,7 @@ static void i40iw_enable_intr(struct i40iw_sc_dev *dev, u32 msix_id) /** * i40iw_dpc - tasklet for aeq and ceq 0 - * @data: iwarp device + * @t: Timer context to fetch pointer to iwarp device */ static void i40iw_dpc(struct tasklet_struct *t) { @@ -200,7 +200,7 @@ static void i40iw_dpc(struct tasklet_struct *t) /** * i40iw_ceq_dpc - dpc handler for CEQ - * @data: data points to CEQ + * @t: Timer context to fetch pointer to CEQ data */ static void i40iw_ceq_dpc(struct tasklet_struct *t) { @@ -227,7 +227,7 @@ static irqreturn_t i40iw_irq_handler(int irq, void *data) /** * i40iw_destroy_cqp - destroy control qp * @iwdev: iwarp device - * @create_done: 1 if cqp create poll was success + * @free_hwcqp: 1 if CQP should be destroyed * * Issue destroy cqp request and * free the resources associated with the cqp @@ -253,7 +253,7 @@ static void i40iw_destroy_cqp(struct i40iw_device *iwdev, bool free_hwcqp) /** * i40iw_disable_irqs - disable device interrupts * @dev: hardware control device structure - * @msic_vec: msix vector to disable irq + * @msix_vec: msix vector to disable irq * @dev_id: parameter to pass to free_irq (used during irq setup) * * The function is called when destroying aeq/ceq @@ -394,8 +394,9 @@ static enum i40iw_hmc_rsrc_type iw_hmc_obj_types[] = { /** * i40iw_close_hmc_objects_type - delete hmc objects of a given type - * @iwdev: iwarp device + * @dev: iwarp device * @obj_type: the hmc object type to be deleted + * @hmc_info: pointer to the HMC configuration information * @is_pf: true if the function is PF otherwise false * @reset: true if called before reset */ @@ -437,6 +438,7 @@ static void i40iw_del_hmc_objects(struct i40iw_sc_dev *dev, /** * i40iw_ceq_handler - interrupt handler for ceq + * @irq: interrupt request number * @data: ceq pointer */ static irqreturn_t i40iw_ceq_handler(int irq, void *data) @@ -1777,6 +1779,7 @@ static void i40iw_l2param_change(struct i40e_info *ldev, struct i40e_client *cli /** * i40iw_close - client interface operation close for iwarp/uda device * @ldev: lan device information + * @reset: true if called before reset * @client: client to close * * Called by the lan driver during the processing of client unregister diff --git a/drivers/infiniband/hw/i40iw/i40iw_pble.c b/drivers/infiniband/hw/i40iw/i40iw_pble.c index 5f97643e22e5..53e5cd1a2bd6 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_pble.c +++ b/drivers/infiniband/hw/i40iw/i40iw_pble.c @@ -54,6 +54,7 @@ static void i40iw_free_vmalloc_mem(struct i40iw_hw *hw, struct i40iw_chunk *chun /** * i40iw_destroy_pble_pool - destroy pool during module unload + * @dev: i40iw_sc_dev struct * @pble_rsrc: pble resources */ void i40iw_destroy_pble_pool(struct i40iw_sc_dev *dev, struct i40iw_hmc_pble_rsrc *pble_rsrc) @@ -112,8 +113,8 @@ enum i40iw_status_code i40iw_hmc_init_pble(struct i40iw_sc_dev *dev, /** * get_sd_pd_idx - Returns sd index, pd index and rel_pd_idx from fpm address - * @ pble_rsrc: structure containing fpm address - * @ idx: where to return indexes + * @pble_rsrc: structure containing fpm address + * @idx: where to return indexes */ static inline void get_sd_pd_idx(struct i40iw_hmc_pble_rsrc *pble_rsrc, struct sd_pd_idx *idx) diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c index 924be4b03c9a..d1c8cc0a6236 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_puda.c +++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c @@ -511,7 +511,8 @@ static void i40iw_puda_qp_setctx(struct i40iw_puda_rsrc *rsrc) /** * i40iw_puda_qp_wqe - setup wqe for qp create - * @rsrc: resource for qp + * @dev: iwarp device + * @qp: resource for qp */ static enum i40iw_status_code i40iw_puda_qp_wqe(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp) { @@ -623,7 +624,8 @@ static enum i40iw_status_code i40iw_puda_qp_create(struct i40iw_puda_rsrc *rsrc) /** * i40iw_puda_cq_wqe - setup wqe for cq create - * @rsrc: resource for cq + * @dev: iwarp device + * @cq: cq to setup */ static enum i40iw_status_code i40iw_puda_cq_wqe(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq) { @@ -782,7 +784,7 @@ static void i40iw_puda_free_cq(struct i40iw_puda_rsrc *rsrc) /** * i40iw_puda_dele_resources - delete all resources during close - * @dev: iwarp device + * @vsi: pointer to vsi structure * @type: type of resource to dele * @reset: true if reset chip */ @@ -876,7 +878,7 @@ static enum i40iw_status_code i40iw_puda_allocbufs(struct i40iw_puda_rsrc *rsrc, /** * i40iw_puda_create_rsrc - create resouce (ilq or ieq) - * @dev: iwarp device + * @vsi: pointer to vsi structure * @info: resource information */ enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_vsi *vsi, @@ -1121,6 +1123,7 @@ static void i40iw_ieq_compl_pfpdu(struct i40iw_puda_rsrc *ieq, /** * i40iw_ieq_create_pbufl - create buffer list for single fpdu + * @pfpdu: partial management per user qp * @rxlist: resource list for receive ieq buffes * @pbufl: temp. list for buffers for fpddu * @buf: first receive buffer @@ -1434,7 +1437,7 @@ static void i40iw_ieq_handle_exception(struct i40iw_puda_rsrc *ieq, /** * i40iw_ieq_receive - received exception buffer - * @dev: iwarp device + * @vsi: pointer to vsi structure * @buf: exception buffer received */ static void i40iw_ieq_receive(struct i40iw_sc_vsi *vsi, diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c index c3633c9944db..f521be16bf31 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_uk.c +++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c @@ -119,6 +119,8 @@ void i40iw_qp_post_wr(struct i40iw_qp_uk *qp) * @qp: hw qp ptr * @wqe_idx: return wqe index * @wqe_size: size of sq wqe + * @total_size: work request length + * @wr_id: work request id */ u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx, @@ -717,7 +719,6 @@ static enum i40iw_status_code i40iw_cq_post_entries(struct i40iw_cq_uk *cq, * i40iw_cq_poll_completion - get cq completion info * @cq: hw cq * @info: cq poll information returned - * @post_cq: update cq tail */ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq, struct i40iw_cq_poll_info *info) @@ -1051,7 +1052,7 @@ void i40iw_device_init_uk(struct i40iw_dev_uk *dev) /** * i40iw_clean_cq - clean cq entries - * @ queue completion context + * @queue: completion context * @cq: cq to clean */ void i40iw_clean_cq(void *queue, struct i40iw_cq_uk *cq) diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index 644f8c641aa0..76f052b12c14 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -55,6 +55,7 @@ * i40iw_arp_table - manage arp table * @iwdev: iwarp device * @ip_addr: ip address for device + * @ipv4: flag indicating IPv4 when true * @mac_addr: mac address ptr * @action: modify, delete or add */ @@ -138,7 +139,7 @@ inline u32 i40iw_rd32(struct i40iw_hw *hw, u32 reg) /** * i40iw_inetaddr_event - system notifier for ipv4 addr events - * @notfier: not used + * @notifier: not used * @event: event for notifier * @ptr: if address */ @@ -214,7 +215,7 @@ int i40iw_inetaddr_event(struct notifier_block *notifier, /** * i40iw_inet6addr_event - system notifier for ipv6 addr events - * @notfier: not used + * @notifier: not used * @event: event for notifier * @ptr: if address */ @@ -265,7 +266,7 @@ int i40iw_inet6addr_event(struct notifier_block *notifier, /** * i40iw_net_event - system notifier for netevents - * @notfier: not used + * @notifier: not used * @event: event for notifier * @ptr: neighbor */ @@ -310,7 +311,7 @@ int i40iw_net_event(struct notifier_block *notifier, unsigned long event, void * /** * i40iw_netdevice_event - system notifier for netdev events - * @notfier: not used + * @notifier: not used * @event: event for notifier * @ptr: netdev */ @@ -652,6 +653,7 @@ struct ib_qp *i40iw_get_qp(struct ib_device *device, int qpn) * i40iw_debug_buf - print debug msg and buffer is mask set * @dev: hardware control device structure * @mask: mask to compare if to print debug buffer + * @desc: identifying string * @buf: points buffer addr * @size: saize of buffer to print */ @@ -784,7 +786,7 @@ enum i40iw_status_code i40iw_free_virt_mem(struct i40iw_hw *hw, /** * i40iw_cqp_sds_cmd - create cqp command for sd * @dev: hardware control device structure - * @sd_info: information for sd cqp + * @sdinfo: information for sd cqp * */ enum i40iw_status_code i40iw_cqp_sds_cmd(struct i40iw_sc_dev *dev, @@ -889,7 +891,7 @@ void i40iw_terminate_done(struct i40iw_sc_qp *qp, int timeout_occurred) /** * i40iw_terminate_imeout - timeout happened - * @context: points to iwarp qp + * @t: points to iwarp qp */ static void i40iw_terminate_timeout(struct timer_list *t) { @@ -943,7 +945,7 @@ static void i40iw_cqp_generic_worker(struct work_struct *work) /** * i40iw_cqp_spawn_worker - spawn worket thread - * @iwdev: device struct pointer + * @dev: device struct pointer * @work_info: work request info * @iw_vf_idx: virtual function index */ @@ -1048,7 +1050,7 @@ enum i40iw_status_code i40iw_cqp_manage_hmc_fcn_cmd(struct i40iw_sc_dev *dev, /** * i40iw_cqp_query_fpm_values_cmd - send cqp command for fpm - * @iwdev: function device struct + * @dev: function device struct * @values_mem: buffer for fpm * @hmc_fn_id: function id for fpm */ @@ -1114,7 +1116,7 @@ enum i40iw_status_code i40iw_cqp_commit_fpm_values_cmd(struct i40iw_sc_dev *dev, /** * i40iw_vf_wait_vchnl_resp - wait for channel msg - * @iwdev: function's device struct + * @dev: function's device struct */ enum i40iw_status_code i40iw_vf_wait_vchnl_resp(struct i40iw_sc_dev *dev) { @@ -1461,7 +1463,7 @@ enum i40iw_status_code i40iw_puda_get_tcpip_info(struct i40iw_puda_completion_in /** * i40iw_hw_stats_timeout - Stats timer-handler which updates all HW stats - * @vsi: pointer to the vsi structure + * @t: Timer context containing pointer to the vsi structure */ static void i40iw_hw_stats_timeout(struct timer_list *t) { diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 65aedfe57e77..f18d146a6079 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -265,9 +265,7 @@ static struct i40iw_pbl *i40iw_get_pbl(unsigned long va, /** * i40iw_free_qp_resources - free up memory resources for qp - * @iwdev: iwarp device * @iwqp: qp ptr (user or kernel) - * @qp_num: qp number assigned */ void i40iw_free_qp_resources(struct i40iw_qp *iwqp) { @@ -302,6 +300,7 @@ static void i40iw_clean_cqes(struct i40iw_qp *iwqp, struct i40iw_cq *iwcq) /** * i40iw_destroy_qp - destroy qp * @ibqp: qp's ib pointer also to get to device's qp address + * @udata: user data */ static int i40iw_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { @@ -338,8 +337,8 @@ static int i40iw_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) /** * i40iw_setup_virt_qp - setup for allocation of virtual qp - * @dev: iwarp device - * @qp: qp ptr + * @iwdev: iwarp device + * @iwqp: qp ptr * @init_info: initialize info to return */ static int i40iw_setup_virt_qp(struct i40iw_device *iwdev, @@ -1241,7 +1240,7 @@ static void i40iw_copy_user_pgaddrs(struct i40iw_mr *iwmr, * i40iw_check_mem_contiguous - check if pbls stored in arr are contiguous * @arr: lvl1 pbl array * @npages: page count - * pg_size: page size + * @pg_size: page size * */ static bool i40iw_check_mem_contiguous(u64 *arr, u32 npages, u32 pg_size) @@ -1258,7 +1257,7 @@ static bool i40iw_check_mem_contiguous(u64 *arr, u32 npages, u32 pg_size) /** * i40iw_check_mr_contiguous - check if MR is physically contiguous * @palloc: pbl allocation struct - * pg_size: page size + * @pg_size: page size */ static bool i40iw_check_mr_contiguous(struct i40iw_pble_alloc *palloc, u32 pg_size) { @@ -1533,6 +1532,7 @@ static int i40iw_set_page(struct ib_mr *ibmr, u64 addr) * @ibmr: ib mem to access iwarp mr pointer * @sg: scatter gather list for fmr * @sg_nents: number of sg pages + * @sg_offset: scatter gather offset */ static int i40iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset) @@ -1881,6 +1881,7 @@ static void i40iw_del_memlist(struct i40iw_mr *iwmr, /** * i40iw_dereg_mr - deregister mr * @ib_mr: mr ptr for dereg + * @udata: user data */ static int i40iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) { @@ -1945,7 +1946,7 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) return 0; } -/** +/* * hw_rev_show */ static ssize_t hw_rev_show(struct device *dev, @@ -1959,7 +1960,7 @@ static ssize_t hw_rev_show(struct device *dev, } static DEVICE_ATTR_RO(hw_rev); -/** +/* * hca_type_show */ static ssize_t hca_type_show(struct device *dev, @@ -1969,7 +1970,7 @@ static ssize_t hca_type_show(struct device *dev, } static DEVICE_ATTR_RO(hca_type); -/** +/* * board_id_show */ static ssize_t board_id_show(struct device *dev, diff --git a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c index 48fd327f876b..aca9061688ae 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c @@ -119,7 +119,7 @@ static enum i40iw_status_code vchnl_vf_send_get_pe_stats_req(struct i40iw_sc_dev return ret_code; } -/** +/* * vchnl_vf_send_add_hmc_objs_req - Add HMC objects * @dev: IWARP device pointer * @vchnl_req: Virtual channel message request pointer @@ -158,9 +158,9 @@ static enum i40iw_status_code vchnl_vf_send_add_hmc_objs_req(struct i40iw_sc_dev * vchnl_vf_send_del_hmc_objs_req - del HMC objects * @dev: IWARP device pointer * @vchnl_req: Virtual channel message request pointer - * @ rsrc_type - resource type to delete - * @ start_index - starting index for resource - * @ rsrc_count - number of resource type to delete + * @rsrc_type: resource type to delete + * @start_index: starting index for resource + * @rsrc_count: number of resource type to delete */ static enum i40iw_status_code vchnl_vf_send_del_hmc_objs_req(struct i40iw_sc_dev *dev, struct i40iw_virtchnl_req *vchnl_req, @@ -222,6 +222,7 @@ static void vchnl_pf_send_get_ver_resp(struct i40iw_sc_dev *dev, * @dev: IWARP device pointer * @vf_id: Virtual function ID associated with the message * @vchnl_msg: Virtual channel message buffer pointer + * @hmc_fcn: HMC function index pointer */ static void vchnl_pf_send_get_hmc_fcn_resp(struct i40iw_sc_dev *dev, u32 vf_id, @@ -276,6 +277,7 @@ static void vchnl_pf_send_get_pe_stats_resp(struct i40iw_sc_dev *dev, * @dev: IWARP device pointer * @vf_id: Virtual function ID associated with the message * @vchnl_msg: Virtual channel message buffer pointer + * @op_ret_code: I40IW_ERR_* status code */ static void vchnl_pf_send_error_resp(struct i40iw_sc_dev *dev, u32 vf_id, struct i40iw_virtchnl_op_buf *vchnl_msg, @@ -297,8 +299,9 @@ static void vchnl_pf_send_error_resp(struct i40iw_sc_dev *dev, u32 vf_id, /** * pf_cqp_get_hmc_fcn_callback - Callback for Get HMC Fcn - * @cqp_req_param: CQP Request param value - * @not_used: unused CQP callback parameter + * @dev: IWARP device pointer + * @callback_param: unused CQP callback parameter + * @cqe_info: CQE information pointer */ static void pf_cqp_get_hmc_fcn_callback(struct i40iw_sc_dev *dev, void *callback_param, struct i40iw_ccq_cqe_info *cqe_info) @@ -331,7 +334,7 @@ static void pf_cqp_get_hmc_fcn_callback(struct i40iw_sc_dev *dev, void *callback /** * pf_add_hmc_obj - Callback for Add HMC Object - * @vf_dev: pointer to the VF Device + * @work_vf_dev: pointer to the VF Device */ static void pf_add_hmc_obj_callback(void *work_vf_dev) { @@ -404,7 +407,7 @@ del_out: /** * i40iw_vf_init_pestat - Initialize stats for VF - * @devL pointer to the VF Device + * @dev: pointer to the VF Device * @stats: Statistics structure pointer * @index: Stats index */ diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index e3cd402c079a..f26a0d920842 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1699,7 +1699,7 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, struct mlx4_dev *dev = (to_mdev(qp->device))->dev; int is_bonded = mlx4_is_bonded(dev); - if (flow_attr->port < 1 || flow_attr->port > qp->device->phys_port_cnt) + if (!rdma_is_port_valid(qp->device, flow_attr->port)) return ERR_PTR(-EINVAL); if (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP) diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index 1b5891130aab..24ee79aa2122 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -798,7 +798,7 @@ static void unregister_pkey_tree(struct mlx4_ib_dev *device) int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *dev) { - int i; + unsigned int i; int ret = 0; if (!mlx4_is_master(dev->dev)) @@ -817,7 +817,7 @@ int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *dev) goto err_ports; } - for (i = 1; i <= dev->ib_dev.phys_port_cnt; ++i) { + rdma_for_each_port(&dev->ib_dev, i) { ret = add_port_entries(dev, i); if (ret) goto err_add_entries; diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 819c142857d6..ebc2a4355fa5 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -94,13 +94,13 @@ struct devx_umem { struct mlx5_core_dev *mdev; struct ib_umem *umem; u32 dinlen; - u32 dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)]; + u32 dinbox[MLX5_ST_SZ_DW(destroy_umem_in)]; }; struct devx_umem_reg_cmd { void *in; u32 inlen; - u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + u32 out[MLX5_ST_SZ_DW(create_umem_out)]; }; static struct mlx5_ib_ucontext * @@ -111,8 +111,8 @@ devx_ufile2uctx(const struct uverbs_attr_bundle *attrs) int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user) { - u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; + u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {}; + u32 out[MLX5_ST_SZ_DW(create_uctx_out)] = {}; void *uctx; int err; u16 uid; @@ -138,14 +138,14 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user) if (err) return err; - uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + uid = MLX5_GET(create_uctx_out, out, uid); return uid; } void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) { - u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {}; + u32 out[MLX5_ST_SZ_DW(destroy_uctx_out)] = {}; MLX5_SET(destroy_uctx_in, in, opcode, MLX5_CMD_OP_DESTROY_UCTX); MLX5_SET(destroy_uctx_in, in, uid, uid); @@ -288,6 +288,80 @@ static u64 get_enc_obj_id(u32 opcode, u32 obj_id) return ((u64)opcode << 32) | obj_id; } +static u32 devx_get_created_obj_id(const void *in, const void *out, u16 opcode) +{ + switch (opcode) { + case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: + return MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + case MLX5_CMD_OP_CREATE_UMEM: + return MLX5_GET(create_umem_out, out, umem_id); + case MLX5_CMD_OP_CREATE_MKEY: + return MLX5_GET(create_mkey_out, out, mkey_index); + case MLX5_CMD_OP_CREATE_CQ: + return MLX5_GET(create_cq_out, out, cqn); + case MLX5_CMD_OP_ALLOC_PD: + return MLX5_GET(alloc_pd_out, out, pd); + case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN: + return MLX5_GET(alloc_transport_domain_out, out, + transport_domain); + case MLX5_CMD_OP_CREATE_RMP: + return MLX5_GET(create_rmp_out, out, rmpn); + case MLX5_CMD_OP_CREATE_SQ: + return MLX5_GET(create_sq_out, out, sqn); + case MLX5_CMD_OP_CREATE_RQ: + return MLX5_GET(create_rq_out, out, rqn); + case MLX5_CMD_OP_CREATE_RQT: + return MLX5_GET(create_rqt_out, out, rqtn); + case MLX5_CMD_OP_CREATE_TIR: + return MLX5_GET(create_tir_out, out, tirn); + case MLX5_CMD_OP_CREATE_TIS: + return MLX5_GET(create_tis_out, out, tisn); + case MLX5_CMD_OP_ALLOC_Q_COUNTER: + return MLX5_GET(alloc_q_counter_out, out, counter_set_id); + case MLX5_CMD_OP_CREATE_FLOW_TABLE: + return MLX5_GET(create_flow_table_out, out, table_id); + case MLX5_CMD_OP_CREATE_FLOW_GROUP: + return MLX5_GET(create_flow_group_out, out, group_id); + case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + return MLX5_GET(set_fte_in, in, flow_index); + case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: + return MLX5_GET(alloc_flow_counter_out, out, flow_counter_id); + case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT: + return MLX5_GET(alloc_packet_reformat_context_out, out, + packet_reformat_id); + case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT: + return MLX5_GET(alloc_modify_header_context_out, out, + modify_header_id); + case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: + return MLX5_GET(create_scheduling_element_out, out, + scheduling_element_id); + case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: + return MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port); + case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: + return MLX5_GET(set_l2_table_entry_in, in, table_index); + case MLX5_CMD_OP_CREATE_QP: + return MLX5_GET(create_qp_out, out, qpn); + case MLX5_CMD_OP_CREATE_SRQ: + return MLX5_GET(create_srq_out, out, srqn); + case MLX5_CMD_OP_CREATE_XRC_SRQ: + return MLX5_GET(create_xrc_srq_out, out, xrc_srqn); + case MLX5_CMD_OP_CREATE_DCT: + return MLX5_GET(create_dct_out, out, dctn); + case MLX5_CMD_OP_CREATE_XRQ: + return MLX5_GET(create_xrq_out, out, xrqn); + case MLX5_CMD_OP_ATTACH_TO_MCG: + return MLX5_GET(attach_to_mcg_in, in, qpn); + case MLX5_CMD_OP_ALLOC_XRCD: + return MLX5_GET(alloc_xrcd_out, out, xrcd); + case MLX5_CMD_OP_CREATE_PSV: + return MLX5_GET(create_psv_out, out, psv0_index); + default: + /* The entry must match to one of the devx_is_obj_create_cmd */ + WARN_ON(true); + return 0; + } +} + static u64 devx_get_obj_id(const void *in) { u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); @@ -399,8 +473,8 @@ static u64 devx_get_obj_id(const void *in) break; case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT: obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT, - MLX5_GET(general_obj_in_cmd_hdr, in, - obj_id)); + MLX5_GET(query_modify_header_context_in, + in, modify_header_id)); break; case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT, @@ -1019,63 +1093,76 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, u32 *dinlen, u32 *obj_id) { - u16 obj_type = MLX5_GET(general_obj_in_cmd_hdr, in, obj_type); + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); u16 uid = MLX5_GET(general_obj_in_cmd_hdr, in, uid); - *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + *obj_id = devx_get_created_obj_id(in, out, opcode); *dinlen = MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr); - - MLX5_SET(general_obj_in_cmd_hdr, din, obj_id, *obj_id); MLX5_SET(general_obj_in_cmd_hdr, din, uid, uid); - switch (MLX5_GET(general_obj_in_cmd_hdr, in, opcode)) { + switch (opcode) { case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); - MLX5_SET(general_obj_in_cmd_hdr, din, obj_type, obj_type); + MLX5_SET(general_obj_in_cmd_hdr, din, obj_id, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, obj_type, + MLX5_GET(general_obj_in_cmd_hdr, in, obj_type)); break; case MLX5_CMD_OP_CREATE_UMEM: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(destroy_umem_in, din, opcode, MLX5_CMD_OP_DESTROY_UMEM); + MLX5_SET(destroy_umem_in, din, umem_id, *obj_id); break; case MLX5_CMD_OP_CREATE_MKEY: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_MKEY); + MLX5_SET(destroy_mkey_in, din, opcode, + MLX5_CMD_OP_DESTROY_MKEY); + MLX5_SET(destroy_mkey_in, in, mkey_index, *obj_id); break; case MLX5_CMD_OP_CREATE_CQ: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_CQ); + MLX5_SET(destroy_cq_in, din, opcode, MLX5_CMD_OP_DESTROY_CQ); + MLX5_SET(destroy_cq_in, din, cqn, *obj_id); break; case MLX5_CMD_OP_ALLOC_PD: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_PD); + MLX5_SET(dealloc_pd_in, din, opcode, MLX5_CMD_OP_DEALLOC_PD); + MLX5_SET(dealloc_pd_in, din, pd, *obj_id); break; case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(dealloc_transport_domain_in, din, opcode, MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN); + MLX5_SET(dealloc_transport_domain_in, din, transport_domain, + *obj_id); break; case MLX5_CMD_OP_CREATE_RMP: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RMP); + MLX5_SET(destroy_rmp_in, din, opcode, MLX5_CMD_OP_DESTROY_RMP); + MLX5_SET(destroy_rmp_in, din, rmpn, *obj_id); break; case MLX5_CMD_OP_CREATE_SQ: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SQ); + MLX5_SET(destroy_sq_in, din, opcode, MLX5_CMD_OP_DESTROY_SQ); + MLX5_SET(destroy_sq_in, din, sqn, *obj_id); break; case MLX5_CMD_OP_CREATE_RQ: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQ); + MLX5_SET(destroy_rq_in, din, opcode, MLX5_CMD_OP_DESTROY_RQ); + MLX5_SET(destroy_rq_in, din, rqn, *obj_id); break; case MLX5_CMD_OP_CREATE_RQT: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQT); + MLX5_SET(destroy_rqt_in, din, opcode, MLX5_CMD_OP_DESTROY_RQT); + MLX5_SET(destroy_rqt_in, din, rqtn, *obj_id); break; case MLX5_CMD_OP_CREATE_TIR: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIR); + MLX5_SET(destroy_tir_in, din, opcode, MLX5_CMD_OP_DESTROY_TIR); + MLX5_SET(destroy_tir_in, din, tirn, *obj_id); break; case MLX5_CMD_OP_CREATE_TIS: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIS); + MLX5_SET(destroy_tis_in, din, opcode, MLX5_CMD_OP_DESTROY_TIS); + MLX5_SET(destroy_tis_in, din, tisn, *obj_id); break; case MLX5_CMD_OP_ALLOC_Q_COUNTER: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(dealloc_q_counter_in, din, opcode, MLX5_CMD_OP_DEALLOC_Q_COUNTER); + MLX5_SET(dealloc_q_counter_in, din, counter_set_id, *obj_id); break; case MLX5_CMD_OP_CREATE_FLOW_TABLE: *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_table_in); - *obj_id = MLX5_GET(create_flow_table_out, out, table_id); MLX5_SET(destroy_flow_table_in, din, other_vport, MLX5_GET(create_flow_table_in, in, other_vport)); MLX5_SET(destroy_flow_table_in, din, vport_number, @@ -1083,12 +1170,11 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, MLX5_SET(destroy_flow_table_in, din, table_type, MLX5_GET(create_flow_table_in, in, table_type)); MLX5_SET(destroy_flow_table_in, din, table_id, *obj_id); - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(destroy_flow_table_in, din, opcode, MLX5_CMD_OP_DESTROY_FLOW_TABLE); break; case MLX5_CMD_OP_CREATE_FLOW_GROUP: *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_group_in); - *obj_id = MLX5_GET(create_flow_group_out, out, group_id); MLX5_SET(destroy_flow_group_in, din, other_vport, MLX5_GET(create_flow_group_in, in, other_vport)); MLX5_SET(destroy_flow_group_in, din, vport_number, @@ -1098,12 +1184,11 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, MLX5_SET(destroy_flow_group_in, din, table_id, MLX5_GET(create_flow_group_in, in, table_id)); MLX5_SET(destroy_flow_group_in, din, group_id, *obj_id); - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(destroy_flow_group_in, din, opcode, MLX5_CMD_OP_DESTROY_FLOW_GROUP); break; case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: *dinlen = MLX5_ST_SZ_BYTES(delete_fte_in); - *obj_id = MLX5_GET(set_fte_in, in, flow_index); MLX5_SET(delete_fte_in, din, other_vport, MLX5_GET(set_fte_in, in, other_vport)); MLX5_SET(delete_fte_in, din, vport_number, @@ -1113,63 +1198,70 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, MLX5_SET(delete_fte_in, din, table_id, MLX5_GET(set_fte_in, in, table_id)); MLX5_SET(delete_fte_in, din, flow_index, *obj_id); - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(delete_fte_in, din, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY); break; case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(dealloc_flow_counter_in, din, opcode, MLX5_CMD_OP_DEALLOC_FLOW_COUNTER); + MLX5_SET(dealloc_flow_counter_in, din, flow_counter_id, + *obj_id); break; case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(dealloc_packet_reformat_context_in, din, opcode, MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT); + MLX5_SET(dealloc_packet_reformat_context_in, din, + packet_reformat_id, *obj_id); break; case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(dealloc_modify_header_context_in, din, opcode, MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT); + MLX5_SET(dealloc_modify_header_context_in, din, + modify_header_id, *obj_id); break; case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: *dinlen = MLX5_ST_SZ_BYTES(destroy_scheduling_element_in); - *obj_id = MLX5_GET(create_scheduling_element_out, out, - scheduling_element_id); MLX5_SET(destroy_scheduling_element_in, din, scheduling_hierarchy, MLX5_GET(create_scheduling_element_in, in, scheduling_hierarchy)); MLX5_SET(destroy_scheduling_element_in, din, scheduling_element_id, *obj_id); - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(destroy_scheduling_element_in, din, opcode, MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT); break; case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: *dinlen = MLX5_ST_SZ_BYTES(delete_vxlan_udp_dport_in); - *obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port); MLX5_SET(delete_vxlan_udp_dport_in, din, vxlan_udp_port, *obj_id); - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(delete_vxlan_udp_dport_in, din, opcode, MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT); break; case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: *dinlen = MLX5_ST_SZ_BYTES(delete_l2_table_entry_in); - *obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index); MLX5_SET(delete_l2_table_entry_in, din, table_index, *obj_id); - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(delete_l2_table_entry_in, din, opcode, MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY); break; case MLX5_CMD_OP_CREATE_QP: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_QP); + MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP); + MLX5_SET(destroy_qp_in, din, qpn, *obj_id); break; case MLX5_CMD_OP_CREATE_SRQ: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SRQ); + MLX5_SET(destroy_srq_in, din, opcode, MLX5_CMD_OP_DESTROY_SRQ); + MLX5_SET(destroy_srq_in, din, srqn, *obj_id); break; case MLX5_CMD_OP_CREATE_XRC_SRQ: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(destroy_xrc_srq_in, din, opcode, MLX5_CMD_OP_DESTROY_XRC_SRQ); + MLX5_SET(destroy_xrc_srq_in, din, xrc_srqn, *obj_id); break; case MLX5_CMD_OP_CREATE_DCT: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_DCT); + MLX5_SET(destroy_dct_in, din, opcode, MLX5_CMD_OP_DESTROY_DCT); + MLX5_SET(destroy_dct_in, din, dctn, *obj_id); break; case MLX5_CMD_OP_CREATE_XRQ: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_XRQ); + MLX5_SET(destroy_xrq_in, din, opcode, MLX5_CMD_OP_DESTROY_XRQ); + MLX5_SET(destroy_xrq_in, din, xrqn, *obj_id); break; case MLX5_CMD_OP_ATTACH_TO_MCG: *dinlen = MLX5_ST_SZ_BYTES(detach_from_mcg_in); @@ -1178,16 +1270,19 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, memcpy(MLX5_ADDR_OF(detach_from_mcg_in, din, multicast_gid), MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid), MLX5_FLD_SZ_BYTES(attach_to_mcg_in, multicast_gid)); - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DETACH_FROM_MCG); + MLX5_SET(detach_from_mcg_in, din, opcode, + MLX5_CMD_OP_DETACH_FROM_MCG); + MLX5_SET(detach_from_mcg_in, din, qpn, *obj_id); break; case MLX5_CMD_OP_ALLOC_XRCD: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_XRCD); + MLX5_SET(dealloc_xrcd_in, din, opcode, + MLX5_CMD_OP_DEALLOC_XRCD); + MLX5_SET(dealloc_xrcd_in, din, xrcd, *obj_id); break; case MLX5_CMD_OP_CREATE_PSV: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(destroy_psv_in, din, opcode, MLX5_CMD_OP_DESTROY_PSV); - MLX5_SET(destroy_psv_in, din, psvn, - MLX5_GET(create_psv_out, out, psv0_index)); + MLX5_SET(destroy_psv_in, din, psvn, *obj_id); break; default: /* The entry must match to one of the devx_is_obj_create_cmd */ @@ -1215,9 +1310,9 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj, mkey->size = MLX5_GET64(mkc, mkc, len); mkey->pd = MLX5_GET(mkc, mkc, pd); devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size); + init_waitqueue_head(&mkey->wait); - return xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mkey->key), mkey, - GFP_KERNEL)); + return mlx5r_store_odp_mkey(dev, mkey); } static int devx_handle_mkey_create(struct mlx5_ib_dev *dev, @@ -1290,16 +1385,15 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, int ret; dev = mlx5_udata_to_mdev(&attrs->driver_udata); - if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY && + xa_erase(&obj->ib_dev->odp_mkeys, + mlx5_base_mkey(obj->devx_mr.mmkey.key))) /* * The pagefault_single_data_segment() does commands against * the mmkey, we must wait for that to stop before freeing the * mkey, as another allocation could get the same mkey #. */ - xa_erase(&obj->ib_dev->odp_mkeys, - mlx5_base_mkey(obj->devx_mr.mmkey.key)); - synchronize_srcu(&dev->odp_srcu); - } + mlx5r_deref_wait_odp_mkey(&obj->devx_mr.mmkey); if (obj->flags & DEVX_OBJ_FLAGS_DCT) ret = mlx5_core_destroy_dct(obj->ib_dev, &obj->core_dct); @@ -1345,6 +1439,16 @@ out: rcu_read_unlock(); } +static bool is_apu_thread_cq(struct mlx5_ib_dev *dev, const void *in) +{ + if (!MLX5_CAP_GEN(dev->mdev, apu) || + !MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), + apu_thread_cq)) + return false; + + return true; +} + static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( struct uverbs_attr_bundle *attrs) { @@ -1398,7 +1502,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( obj->flags |= DEVX_OBJ_FLAGS_DCT; err = mlx5_core_create_dct(dev, &obj->core_dct, cmd_in, cmd_in_len, cmd_out, cmd_out_len); - } else if (opcode == MLX5_CMD_OP_CREATE_CQ) { + } else if (opcode == MLX5_CMD_OP_CREATE_CQ && + !is_apu_thread_cq(dev, cmd_in)) { obj->flags |= DEVX_OBJ_FLAGS_CQ; obj->core_cq.comp = devx_cq_comp; err = mlx5_core_create_cq(dev->mdev, &obj->core_cq, diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 9bb9bb058932..652c6ccf1881 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -48,7 +48,7 @@ static bool can_do_mad_ifc(struct mlx5_ib_dev *dev, u8 port_num, if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED && in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) return true; - return dev->mdev->port_caps[port_num - 1].has_smi; + return dev->port_caps[port_num - 1].has_smi; } static int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, @@ -279,7 +279,7 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; } -int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port) +int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, unsigned int port) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; @@ -299,7 +299,7 @@ int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port) packet_error = be16_to_cpu(out_mad->status); - dev->mdev->port_caps[port - 1].ext_port_cap = (!err && !packet_error) ? + dev->port_caps[port - 1].ext_port_cap = (!err && !packet_error) ? MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO : 0; out: @@ -308,8 +308,8 @@ out: return err; } -int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, - struct ib_smp *out_mad) +static int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, + struct ib_smp *out_mad) { struct ib_smp *in_mad = NULL; int err = -ENOMEM; @@ -549,7 +549,7 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port, props->port_cap_flags = be32_to_cpup((__be32 *)(out_mad->data + 20)); props->gid_tbl_len = out_mad->data[50]; props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg); - props->pkey_tbl_len = mdev->port_caps[port - 1].pkey_table_len; + props->pkey_tbl_len = dev->pkey_table_len; props->bad_pkey_cntr = be16_to_cpup((__be16 *)(out_mad->data + 46)); props->qkey_viol_cntr = be16_to_cpup((__be16 *)(out_mad->data + 48)); props->active_width = out_mad->data[31] & 0xf; @@ -589,7 +589,7 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port, /* If reported active speed is QDR, check if is FDR-10 */ if (props->active_speed == 4) { - if (mdev->port_caps[port - 1].ext_port_cap & + if (dev->port_caps[port - 1].ext_port_cap & MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) { init_query_mad(in_mad); in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index aabdc07e4753..0d69a697d75f 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2020, Intel Corporation. All rights reserved. */ #include <linux/debugfs.h> @@ -461,7 +462,6 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, struct net_device *ndev, *upper; enum ib_mtu ndev_ib_mtu; bool put_mdev = true; - u16 qkey_viol_cntr; u32 eth_prot_oper; u8 mdev_port_num; bool ext; @@ -499,20 +499,22 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, translate_eth_proto_oper(eth_prot_oper, &props->active_speed, &props->active_width, ext); - props->port_cap_flags |= IB_PORT_CM_SUP; - props->ip_gids = true; + if (!dev->is_rep && mlx5_is_roce_enabled(mdev)) { + u16 qkey_viol_cntr; - props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev, - roce_address_table_size); + props->port_cap_flags |= IB_PORT_CM_SUP; + props->ip_gids = true; + props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev, + roce_address_table_size); + mlx5_query_nic_vport_qkey_viol_cntr(mdev, &qkey_viol_cntr); + props->qkey_viol_cntr = qkey_viol_cntr; + } props->max_mtu = IB_MTU_4096; props->max_msg_sz = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg); props->pkey_tbl_len = 1; props->state = IB_PORT_DOWN; props->phys_state = IB_PORT_PHYS_STATE_DISABLED; - mlx5_query_nic_vport_qkey_viol_cntr(mdev, &qkey_viol_cntr); - props->qkey_viol_cntr = qkey_viol_cntr; - /* If this is a stub query for an unaffiliated port stop here */ if (!put_mdev) goto out; @@ -815,9 +817,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (err) return err; - err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys); - if (err) - return err; + props->max_pkeys = dev->pkey_table_len; err = mlx5_query_vendor_id(ibdev, &props->vendor_id); if (err) @@ -1384,19 +1384,17 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, static int mlx5_ib_rep_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { - int ret; + return mlx5_query_port_roce(ibdev, port, props); +} - /* Only link layer == ethernet is valid for representors - * and we always use port 1 +static int mlx5_ib_rep_query_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey) +{ + /* Default special Pkey for representor device port as per the + * IB specification 1.3 section 10.9.1.2. */ - ret = mlx5_query_port_roce(ibdev, port, props); - if (ret || !props) - return ret; - - /* We don't support GIDS */ - props->gid_tbl_len = 0; - - return ret; + *pkey = 0xffff; + return 0; } static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index, @@ -2935,8 +2933,8 @@ static int set_has_smi_cap(struct mlx5_ib_dev *dev) int err; int port; - for (port = 1; port <= ARRAY_SIZE(dev->mdev->port_caps); port++) { - dev->mdev->port_caps[port - 1].has_smi = false; + for (port = 1; port <= ARRAY_SIZE(dev->port_caps); port++) { + dev->port_caps[port - 1].has_smi = false; if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB) { if (MLX5_CAP_GEN(dev->mdev, ib_virt)) { @@ -2948,10 +2946,10 @@ static int set_has_smi_cap(struct mlx5_ib_dev *dev) port, err); return err; } - dev->mdev->port_caps[port - 1].has_smi = + dev->port_caps[port - 1].has_smi = vport_ctx.has_smi; } else { - dev->mdev->port_caps[port - 1].has_smi = true; + dev->port_caps[port - 1].has_smi = true; } } } @@ -2960,63 +2958,12 @@ static int set_has_smi_cap(struct mlx5_ib_dev *dev) static void get_ext_port_caps(struct mlx5_ib_dev *dev) { - int port; + unsigned int port; - for (port = 1; port <= dev->num_ports; port++) + rdma_for_each_port (&dev->ib_dev, port) mlx5_query_ext_port_caps(dev, port); } -static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port) -{ - struct ib_device_attr *dprops = NULL; - struct ib_port_attr *pprops = NULL; - int err = -ENOMEM; - - pprops = kzalloc(sizeof(*pprops), GFP_KERNEL); - if (!pprops) - goto out; - - dprops = kmalloc(sizeof(*dprops), GFP_KERNEL); - if (!dprops) - goto out; - - err = mlx5_ib_query_device(&dev->ib_dev, dprops, NULL); - if (err) { - mlx5_ib_warn(dev, "query_device failed %d\n", err); - goto out; - } - - err = mlx5_ib_query_port(&dev->ib_dev, port, pprops); - if (err) { - mlx5_ib_warn(dev, "query_port %d failed %d\n", - port, err); - goto out; - } - - dev->mdev->port_caps[port - 1].pkey_table_len = - dprops->max_pkeys; - dev->mdev->port_caps[port - 1].gid_table_len = - pprops->gid_tbl_len; - mlx5_ib_dbg(dev, "port %d: pkey_table_len %d, gid_table_len %d\n", - port, dprops->max_pkeys, pprops->gid_tbl_len); - -out: - kfree(pprops); - kfree(dprops); - - return err; -} - -static int get_port_caps(struct mlx5_ib_dev *dev, u8 port) -{ - /* For representors use port 1, is this is the only native - * port - */ - if (dev->is_rep) - return __get_port_caps(dev, 1); - return __get_port_caps(dev, port); -} - static u8 mlx5_get_umr_fence(u8 umr_fence_cap) { switch (umr_fence_cap) { @@ -3488,10 +3435,6 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, if (err) goto unbind; - err = get_port_caps(ibdev, mlx5_core_native_port_num(mpi->mdev)); - if (err) - goto unbind; - err = mlx5_add_netdev_notifier(ibdev, port_num); if (err) { mlx5_ib_err(ibdev, "failed adding netdev notifier for port %u\n", @@ -3569,11 +3512,9 @@ static int mlx5_ib_init_multiport_master(struct mlx5_ib_dev *dev) break; } } - if (!bound) { - get_port_caps(dev, i + 1); + if (!bound) mlx5_ib_dbg(dev, "no free port found for port %d\n", i + 1); - } } list_add_tail(&dev->ib_dev_list, &mlx5_ib_dev_list); @@ -3926,8 +3867,7 @@ static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_cleanup_multiport_master(dev); WARN_ON(!xa_empty(&dev->odp_mkeys)); - cleanup_srcu_struct(&dev->odp_srcu); - + mutex_destroy(&dev->cap_mask_mutex); WARN_ON(!xa_empty(&dev->sig_mrs)); WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES)); } @@ -3938,6 +3878,12 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) int err; int i; + dev->ib_dev.node_type = RDMA_NODE_IB_CA; + dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; + dev->ib_dev.phys_port_cnt = dev->num_ports; + dev->ib_dev.dev.parent = mdev->device; + dev->ib_dev.lag_flags = RDMA_LAG_FLAGS_HASH_ALL_SLAVES; + for (i = 0; i < dev->num_ports; i++) { spin_lock_init(&dev->port[i].mp.mpi_lock); rwlock_init(&dev->port[i].roce.netdev_lock); @@ -3956,27 +3902,14 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) if (err) goto err_mp; - if (!mlx5_core_mp_enabled(mdev)) { - for (i = 1; i <= dev->num_ports; i++) { - err = get_port_caps(dev, i); - if (err) - break; - } - } else { - err = get_port_caps(dev, mlx5_core_native_port_num(mdev)); - } + err = mlx5_query_max_pkeys(&dev->ib_dev, &dev->pkey_table_len); if (err) goto err_mp; if (mlx5_use_mad_ifc(dev)) get_ext_port_caps(dev); - dev->ib_dev.node_type = RDMA_NODE_IB_CA; - dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; - dev->ib_dev.phys_port_cnt = dev->num_ports; dev->ib_dev.num_comp_vectors = mlx5_comp_vectors_count(mdev); - dev->ib_dev.dev.parent = mdev->device; - dev->ib_dev.lag_flags = RDMA_LAG_FLAGS_HASH_ALL_SLAVES; mutex_init(&dev->cap_mask_mutex); INIT_LIST_HEAD(&dev->qp_list); @@ -3987,17 +3920,11 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) spin_lock_init(&dev->dm.lock); dev->dm.dev = mdev; - - err = init_srcu_struct(&dev->odp_srcu); - if (err) - goto err_mp; - return 0; err_mp: mlx5_ib_cleanup_multiport_master(dev); - - return -ENOMEM; + return err; } static int mlx5_ib_enable_driver(struct ib_device *dev) @@ -4067,6 +3994,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .query_srq = mlx5_ib_query_srq, .query_ucontext = mlx5_ib_query_ucontext, .reg_user_mr = mlx5_ib_reg_user_mr, + .reg_user_mr_dmabuf = mlx5_ib_reg_user_mr_dmabuf, .req_notify_cq = mlx5_ib_arm_cq, .rereg_user_mr = mlx5_ib_rereg_user_mr, .resize_cq = mlx5_ib_resize_cq, @@ -4207,6 +4135,7 @@ static int mlx5_ib_stage_non_default_cb(struct mlx5_ib_dev *dev) static const struct ib_device_ops mlx5_ib_dev_port_rep_ops = { .get_port_immutable = mlx5_port_rep_immutable, .query_port = mlx5_ib_rep_query_port, + .query_pkey = mlx5_ib_rep_query_pkey, }; static int mlx5_ib_stage_raw_eth_non_default_cb(struct mlx5_ib_dev *dev) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b0fdc1b08e06..88cc26e008fc 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2020, Intel Corporation. All rights reserved. */ #ifndef MLX5_IB_H @@ -683,11 +684,8 @@ struct mlx5_ib_mr { u64 pi_iova; /* For ODP and implicit */ - atomic_t num_deferred_work; - wait_queue_head_t q_deferred_work; struct xarray implicit_children; union { - struct rcu_head rcu; struct list_head elm; struct work_struct work; } odp_destroy; @@ -703,6 +701,12 @@ static inline bool is_odp_mr(struct mlx5_ib_mr *mr) mr->umem->is_odp; } +static inline bool is_dmabuf_mr(struct mlx5_ib_mr *mr) +{ + return IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && mr->umem && + mr->umem->is_dmabuf; +} + struct mlx5_ib_mw { struct ib_mw ibmw; struct mlx5_core_mkey mmkey; @@ -1029,6 +1033,11 @@ struct mlx5_var_table { u64 num_var_hw_entries; }; +struct mlx5_port_caps { + bool has_smi; + u8 ext_port_cap; +}; + struct mlx5_ib_dev { struct ib_device ib_dev; struct mlx5_core_dev *mdev; @@ -1056,11 +1065,6 @@ struct mlx5_ib_dev { u64 odp_max_size; struct mlx5_ib_pf_eq odp_pf_eq; - /* - * Sleepable RCU that prevents destruction of MRs while they are still - * being used by a page fault handler. - */ - struct srcu_struct odp_srcu; struct xarray odp_mkeys; u32 null_mkey; @@ -1089,6 +1093,8 @@ struct mlx5_ib_dev { struct mlx5_var_table var_table; struct xarray sig_mrs; + struct mlx5_port_caps port_caps[MLX5_MAX_PORTS]; + u16 pkey_table_len; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) @@ -1243,6 +1249,10 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); +struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start, + u64 length, u64 virt_addr, + int fd, int access_flags, + struct ib_udata *udata); int mlx5_ib_advise_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 flags, @@ -1253,11 +1263,13 @@ int mlx5_ib_alloc_mw(struct ib_mw *mw, struct ib_udata *udata); int mlx5_ib_dealloc_mw(struct ib_mw *mw); int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, int page_shift, int flags); +int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags); struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, struct ib_udata *udata, int access_flags); void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr); void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr); +void mlx5_ib_fence_dmabuf_mr(struct mlx5_ib_mr *mr); struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_pd *pd, struct ib_udata *udata); @@ -1279,9 +1291,7 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, size_t *out_mad_size, u16 *out_mad_pkey_index); int mlx5_ib_alloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); -int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port); -int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, - struct ib_smp *out_mad); +int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, unsigned int port); int mlx5_query_mad_ifc_system_image_guid(struct ib_device *ibdev, __be64 *sys_image_guid); int mlx5_query_mad_ifc_max_pkeys(struct ib_device *ibdev, @@ -1345,6 +1355,7 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 flags, struct ib_sge *sg_list, u32 num_sge); int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr); +int mlx5_ib_init_dmabuf_mr(struct mlx5_ib_mr *mr); #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) { @@ -1370,6 +1381,10 @@ static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr) { return -EOPNOTSUPP; } +static inline int mlx5_ib_init_dmabuf_mr(struct mlx5_ib_mr *mr) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ extern const struct mmu_interval_notifier_ops mlx5_mn_ops; @@ -1576,6 +1591,29 @@ static inline bool mlx5_ib_can_reconfig_with_umr(struct mlx5_ib_dev *dev, return true; } +static inline int mlx5r_store_odp_mkey(struct mlx5_ib_dev *dev, + struct mlx5_core_mkey *mmkey) +{ + refcount_set(&mmkey->usecount, 1); + + return xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mmkey->key), + mmkey, GFP_KERNEL)); +} + +/* deref an mkey that can participate in ODP flow */ +static inline void mlx5r_deref_odp_mkey(struct mlx5_core_mkey *mmkey) +{ + if (refcount_dec_and_test(&mmkey->usecount)) + wake_up(&mmkey->wait); +} + +/* deref an mkey that can participate in ODP flow and wait for relese */ +static inline void mlx5r_deref_wait_odp_mkey(struct mlx5_core_mkey *mmkey) +{ + mlx5r_deref_odp_mkey(mmkey); + wait_event(mmkey->wait, refcount_read(&mmkey->usecount) == 0); +} + int mlx5_ib_test_wc(struct mlx5_ib_dev *dev); static inline bool mlx5_ib_lag_should_assign_affinity(struct mlx5_ib_dev *dev) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 24f8d59a42ea..db05b0e0a8d7 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * Copyright (c) 2020, Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -36,6 +37,8 @@ #include <linux/debugfs.h> #include <linux/export.h> #include <linux/delay.h> +#include <linux/dma-buf.h> +#include <linux/dma-resv.h> #include <rdma/ib_umem.h> #include <rdma/ib_umem_odp.h> #include <rdma/ib_verbs.h> @@ -155,6 +158,7 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) mr->mmkey.type = MLX5_MKEY_MR; mr->mmkey.key |= mlx5_idx_to_mkey( MLX5_GET(create_mkey_out, mr->out, mkey_index)); + init_waitqueue_head(&mr->mmkey.wait); WRITE_ONCE(dev->cache.last_add, jiffies); @@ -935,6 +939,17 @@ static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, mr->access_flags = access_flags; } +static unsigned int mlx5_umem_dmabuf_default_pgsz(struct ib_umem *umem, + u64 iova) +{ + /* + * The alignment of iova has already been checked upon entering + * UVERBS_METHOD_REG_DMABUF_MR + */ + umem->iova = iova; + return PAGE_SIZE; +} + static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, struct ib_umem *umem, u64 iova, int access_flags) @@ -944,7 +959,11 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, struct mlx5_ib_mr *mr; unsigned int page_size; - page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, 0, iova); + if (umem->is_dmabuf) + page_size = mlx5_umem_dmabuf_default_pgsz(umem, iova); + else + page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, + 0, iova); if (WARN_ON(!page_size)) return ERR_PTR(-EINVAL); ent = mr_cache_ent_from_order( @@ -980,7 +999,6 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, mr->mmkey.size = umem->length; mr->mmkey.pd = to_mpd(pd)->pdn; mr->page_shift = order_base_2(page_size); - mr->umem = umem; set_mr_fields(dev, mr, umem->length, access_flags); return mr; @@ -1201,8 +1219,10 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, /* * Send the DMA list to the HW for a normal MR using UMR. + * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP + * flag may be used. */ -static int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) +int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) { struct mlx5_ib_dev *dev = mr_to_mdev(mr); struct device *ddev = &dev->mdev->pdev->dev; @@ -1244,6 +1264,10 @@ static int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) cur_mtt->ptag = cpu_to_be64(rdma_block_iter_dma_address(&biter) | MLX5_IB_MTT_PRESENT); + + if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP)) + cur_mtt->ptag = 0; + cur_mtt++; } @@ -1528,10 +1552,7 @@ static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length, } odp->private = mr; - init_waitqueue_head(&mr->q_deferred_work); - atomic_set(&mr->num_deferred_work, 0); - err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), - &mr->mmkey, GFP_KERNEL)); + err = mlx5r_store_odp_mkey(dev, &mr->mmkey); if (err) goto err_dereg_mr; @@ -1567,6 +1588,81 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return create_real_mr(pd, umem, iova, access_flags); } +static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach) +{ + struct ib_umem_dmabuf *umem_dmabuf = attach->importer_priv; + struct mlx5_ib_mr *mr = umem_dmabuf->private; + + dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv); + + if (!umem_dmabuf->sgt) + return; + + mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP); + ib_umem_dmabuf_unmap_pages(umem_dmabuf); +} + +static struct dma_buf_attach_ops mlx5_ib_dmabuf_attach_ops = { + .allow_peer2peer = 1, + .move_notify = mlx5_ib_dmabuf_invalidate_cb, +}; + +struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset, + u64 length, u64 virt_addr, + int fd, int access_flags, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_ib_mr *mr = NULL; + struct ib_umem_dmabuf *umem_dmabuf; + int err; + + if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) || + !IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + return ERR_PTR(-EOPNOTSUPP); + + mlx5_ib_dbg(dev, + "offset 0x%llx, virt_addr 0x%llx, length 0x%llx, fd %d, access_flags 0x%x\n", + offset, virt_addr, length, fd, access_flags); + + /* dmabuf requires xlt update via umr to work. */ + if (!mlx5_ib_can_load_pas_with_umr(dev, length)) + return ERR_PTR(-EINVAL); + + umem_dmabuf = ib_umem_dmabuf_get(&dev->ib_dev, offset, length, fd, + access_flags, + &mlx5_ib_dmabuf_attach_ops); + if (IS_ERR(umem_dmabuf)) { + mlx5_ib_dbg(dev, "umem_dmabuf get failed (%ld)\n", + PTR_ERR(umem_dmabuf)); + return ERR_CAST(umem_dmabuf); + } + + mr = alloc_cacheable_mr(pd, &umem_dmabuf->umem, virt_addr, + access_flags); + if (IS_ERR(mr)) { + ib_umem_release(&umem_dmabuf->umem); + return ERR_CAST(mr); + } + + mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); + + atomic_add(ib_umem_num_pages(mr->umem), &dev->mdev->priv.reg_pages); + umem_dmabuf->private = mr; + err = mlx5r_store_odp_mkey(dev, &mr->mmkey); + if (err) + goto err_dereg_mr; + + err = mlx5_ib_init_dmabuf_mr(mr); + if (err) + goto err_dereg_mr; + return &mr->ibmr; + +err_dereg_mr: + dereg_mr(dev, mr); + return ERR_PTR(err); +} + /** * mlx5_mr_cache_invalidate - Fence all DMA on the MR * @mr: The MR to fence @@ -1740,8 +1836,8 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, return ERR_PTR(err); return NULL; } - /* DM or ODP MR's don't have a umem so we can't re-use it */ - if (!mr->umem || is_odp_mr(mr)) + /* DM or ODP MR's don't have a normal umem so we can't re-use it */ + if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr)) goto recreate; /* @@ -1760,10 +1856,10 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, } /* - * DM doesn't have a PAS list so we can't re-use it, odp does but the - * logic around releasing the umem is different + * DM doesn't have a PAS list so we can't re-use it, odp/dmabuf does + * but the logic around releasing the umem is different */ - if (!mr->umem || is_odp_mr(mr)) + if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr)) goto recreate; if (!(new_access_flags & IB_ACCESS_ON_DEMAND) && @@ -1876,6 +1972,8 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) /* Stop all DMA */ if (is_odp_mr(mr)) mlx5_ib_fence_odp_mr(mr); + else if (is_dmabuf_mr(mr)) + mlx5_ib_fence_dmabuf_mr(mr); else clean_mr(dev, mr); @@ -2227,9 +2325,7 @@ int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) } if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { - err = xa_err(xa_store(&dev->odp_mkeys, - mlx5_base_mkey(mw->mmkey.key), &mw->mmkey, - GFP_KERNEL)); + err = mlx5r_store_odp_mkey(dev, &mw->mmkey); if (err) goto free_mkey; } @@ -2249,14 +2345,13 @@ int mlx5_ib_dealloc_mw(struct ib_mw *mw) struct mlx5_ib_dev *dev = to_mdev(mw->device); struct mlx5_ib_mw *mmw = to_mmw(mw); - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { - xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key)); + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && + xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key))) /* - * pagefault_single_data_segment() may be accessing mmw under - * SRCU if the user bound an ODP MR to this MW. + * pagefault_single_data_segment() may be accessing mmw + * if the user bound an ODP MR to this MW. */ - synchronize_srcu(&dev->odp_srcu); - } + mlx5r_deref_wait_odp_mkey(&mmw->mmkey); return mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey); } diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index aa2413b50adc..374698186662 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -33,6 +33,8 @@ #include <rdma/ib_umem.h> #include <rdma/ib_umem_odp.h> #include <linux/kernel.h> +#include <linux/dma-buf.h> +#include <linux/dma-resv.h> #include "mlx5_ib.h" #include "cmd.h" @@ -113,7 +115,6 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, * xarray would be protected by the umem_mutex, however that is not * possible. Instead this uses a weaker update-then-lock pattern: * - * srcu_read_lock() * xa_store() * mutex_lock(umem_mutex) * mlx5_ib_update_xlt() @@ -124,12 +125,9 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, * before destroying. * * The umem_mutex provides the acquire/release semantic needed to make - * the xa_store() visible to a racing thread. While SRCU is not - * technically required, using it gives consistent use of the SRCU - * locking around the xarray. + * the xa_store() visible to a racing thread. */ lockdep_assert_held(&to_ib_umem_odp(imr->umem)->umem_mutex); - lockdep_assert_held(&mr_to_mdev(imr)->odp_srcu); for (; pklm != end; pklm++, idx++) { struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx); @@ -205,8 +203,8 @@ static void dma_fence_odp_mr(struct mlx5_ib_mr *mr) } /* - * This must be called after the mr has been removed from implicit_children - * and the SRCU synchronized. NOTE: The MR does not necessarily have to be + * This must be called after the mr has been removed from implicit_children. + * NOTE: The MR does not necessarily have to be * empty here, parallel page faults could have raced with the free process and * added pages to it. */ @@ -216,19 +214,15 @@ static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt) struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; - int srcu_key; - /* implicit_child_mr's are not allowed to have deferred work */ - WARN_ON(atomic_read(&mr->num_deferred_work)); + mlx5r_deref_wait_odp_mkey(&mr->mmkey); if (need_imr_xlt) { - srcu_key = srcu_read_lock(&mr_to_mdev(mr)->odp_srcu); mutex_lock(&odp_imr->umem_mutex); mlx5_ib_update_xlt(mr->parent, idx, 1, 0, MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC); mutex_unlock(&odp_imr->umem_mutex); - srcu_read_unlock(&mr_to_mdev(mr)->odp_srcu, srcu_key); } dma_fence_odp_mr(mr); @@ -236,26 +230,16 @@ static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt) mr->parent = NULL; mlx5_mr_cache_free(mr_to_mdev(mr), mr); ib_umem_odp_release(odp); - if (atomic_dec_and_test(&imr->num_deferred_work)) - wake_up(&imr->q_deferred_work); } static void free_implicit_child_mr_work(struct work_struct *work) { struct mlx5_ib_mr *mr = container_of(work, struct mlx5_ib_mr, odp_destroy.work); + struct mlx5_ib_mr *imr = mr->parent; free_implicit_child_mr(mr, true); -} - -static void free_implicit_child_mr_rcu(struct rcu_head *head) -{ - struct mlx5_ib_mr *mr = - container_of(head, struct mlx5_ib_mr, odp_destroy.rcu); - - /* Freeing a MR is a sleeping operation, so bounce to a work queue */ - INIT_WORK(&mr->odp_destroy.work, free_implicit_child_mr_work); - queue_work(system_unbound_wq, &mr->odp_destroy.work); + mlx5r_deref_odp_mkey(&imr->mmkey); } static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr) @@ -264,21 +248,14 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr) unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; struct mlx5_ib_mr *imr = mr->parent; - xa_lock(&imr->implicit_children); - /* - * This can race with mlx5_ib_free_implicit_mr(), the first one to - * reach the xa lock wins the race and destroys the MR. - */ - if (__xa_cmpxchg(&imr->implicit_children, idx, mr, NULL, GFP_ATOMIC) != - mr) - goto out_unlock; + if (!refcount_inc_not_zero(&imr->mmkey.usecount)) + return; - atomic_inc(&imr->num_deferred_work); - call_srcu(&mr_to_mdev(mr)->odp_srcu, &mr->odp_destroy.rcu, - free_implicit_child_mr_rcu); + xa_erase(&imr->implicit_children, idx); -out_unlock: - xa_unlock(&imr->implicit_children); + /* Freeing a MR is a sleeping operation, so bounce to a work queue */ + INIT_WORK(&mr->odp_destroy.work, free_implicit_child_mr_work); + queue_work(system_unbound_wq, &mr->odp_destroy.work); } static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni, @@ -490,6 +467,12 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, mr->parent = imr; odp->private = mr; + /* + * First refcount is owned by the xarray and second refconut + * is returned to the caller. + */ + refcount_set(&mr->mmkey.usecount, 2); + err = mlx5_ib_update_xlt(mr, 0, MLX5_IMR_MTT_ENTRIES, PAGE_SHIFT, @@ -500,27 +483,28 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, goto out_mr; } - /* - * Once the store to either xarray completes any error unwind has to - * use synchronize_srcu(). Avoid this with xa_reserve() - */ - ret = xa_cmpxchg(&imr->implicit_children, idx, NULL, mr, - GFP_KERNEL); + xa_lock(&imr->implicit_children); + ret = __xa_cmpxchg(&imr->implicit_children, idx, NULL, mr, + GFP_KERNEL); if (unlikely(ret)) { if (xa_is_err(ret)) { ret = ERR_PTR(xa_err(ret)); - goto out_mr; + goto out_lock; } /* * Another thread beat us to creating the child mr, use * theirs. */ - goto out_mr; + refcount_inc(&ret->mmkey.usecount); + goto out_lock; } + xa_unlock(&imr->implicit_children); mlx5_ib_dbg(mr_to_mdev(imr), "key %x mr %p\n", mr->mmkey.key, mr); return mr; +out_lock: + xa_unlock(&imr->implicit_children); out_mr: mlx5_mr_cache_free(mr_to_mdev(imr), mr); out_umem: @@ -559,8 +543,6 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, imr->ibmr.device = &dev->ib_dev; imr->umem = &umem_odp->umem; imr->is_odp_implicit = true; - atomic_set(&imr->num_deferred_work, 0); - init_waitqueue_head(&imr->q_deferred_work); xa_init(&imr->implicit_children); err = mlx5_ib_update_xlt(imr, 0, @@ -572,8 +554,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, if (err) goto out_mr; - err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key), - &imr->mmkey, GFP_KERNEL)); + err = mlx5r_store_odp_mkey(dev, &imr->mmkey); if (err) goto out_mr; @@ -591,60 +572,35 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) { struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); struct mlx5_ib_dev *dev = mr_to_mdev(imr); - struct list_head destroy_list; struct mlx5_ib_mr *mtt; - struct mlx5_ib_mr *tmp; unsigned long idx; - INIT_LIST_HEAD(&destroy_list); - xa_erase(&dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key)); /* - * This stops the SRCU protected page fault path from touching either - * the imr or any children. The page fault path can only reach the - * children xarray via the imr. - */ - synchronize_srcu(&dev->odp_srcu); - - /* * All work on the prefetch list must be completed, xa_erase() prevented * new work from being created. */ - wait_event(imr->q_deferred_work, !atomic_read(&imr->num_deferred_work)); - + mlx5r_deref_wait_odp_mkey(&imr->mmkey); /* * At this point it is forbidden for any other thread to enter * pagefault_mr() on this imr. It is already forbidden to call * pagefault_mr() on an implicit child. Due to this additions to * implicit_children are prevented. + * In addition, any new call to destroy_unused_implicit_child_mr() + * may return immediately. */ /* - * Block destroy_unused_implicit_child_mr() from incrementing - * num_deferred_work. - */ - xa_lock(&imr->implicit_children); - xa_for_each (&imr->implicit_children, idx, mtt) { - __xa_erase(&imr->implicit_children, idx); - list_add(&mtt->odp_destroy.elm, &destroy_list); - } - xa_unlock(&imr->implicit_children); - - /* - * Wait for any concurrent destroy_unused_implicit_child_mr() to - * complete. - */ - wait_event(imr->q_deferred_work, !atomic_read(&imr->num_deferred_work)); - - /* * Fence the imr before we destroy the children. This allows us to * skip updating the XLT of the imr during destroy of the child mkey * the imr points to. */ mlx5_mr_cache_invalidate(imr); - list_for_each_entry_safe (mtt, tmp, &destroy_list, odp_destroy.elm) + xa_for_each(&imr->implicit_children, idx, mtt) { + xa_erase(&imr->implicit_children, idx); free_implicit_child_mr(mtt, false); + } mlx5_mr_cache_free(dev, imr); ib_umem_odp_release(odp_imr); @@ -663,13 +619,39 @@ void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr) xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); /* Wait for all running page-fault handlers to finish. */ - synchronize_srcu(&mr_to_mdev(mr)->odp_srcu); - - wait_event(mr->q_deferred_work, !atomic_read(&mr->num_deferred_work)); + mlx5r_deref_wait_odp_mkey(&mr->mmkey); dma_fence_odp_mr(mr); } +/** + * mlx5_ib_fence_dmabuf_mr - Stop all access to the dmabuf MR + * @mr: to fence + * + * On return no parallel threads will be touching this MR and no DMA will be + * active. + */ +void mlx5_ib_fence_dmabuf_mr(struct mlx5_ib_mr *mr) +{ + struct ib_umem_dmabuf *umem_dmabuf = to_ib_umem_dmabuf(mr->umem); + + /* Prevent new page faults and prefetch requests from succeeding */ + xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); + + mlx5r_deref_wait_odp_mkey(&mr->mmkey); + + dma_resv_lock(umem_dmabuf->attach->dmabuf->resv, NULL); + mlx5_mr_cache_invalidate(mr); + umem_dmabuf->private = NULL; + ib_umem_dmabuf_unmap_pages(umem_dmabuf); + dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); + + if (!mr->cache_ent) { + mlx5_core_destroy_mkey(mr_to_mdev(mr)->mdev, &mr->mmkey); + WARN_ON(mr->descs); + } +} + #define MLX5_PF_FLAGS_DOWNGRADE BIT(1) #define MLX5_PF_FLAGS_SNAPSHOT BIT(2) #define MLX5_PF_FLAGS_ENABLE BIT(3) @@ -747,8 +729,10 @@ static int pagefault_implicit_mr(struct mlx5_ib_mr *imr, struct mlx5_ib_mr *mtt; u64 len; + xa_lock(&imr->implicit_children); mtt = xa_load(&imr->implicit_children, idx); if (unlikely(!mtt)) { + xa_unlock(&imr->implicit_children); mtt = implicit_get_child_mr(imr, idx); if (IS_ERR(mtt)) { ret = PTR_ERR(mtt); @@ -756,6 +740,9 @@ static int pagefault_implicit_mr(struct mlx5_ib_mr *imr, } upd_start_idx = min(upd_start_idx, idx); upd_len = idx - upd_start_idx + 1; + } else { + refcount_inc(&mtt->mmkey.usecount); + xa_unlock(&imr->implicit_children); } umem_odp = to_ib_umem_odp(mtt->umem); @@ -764,6 +751,9 @@ static int pagefault_implicit_mr(struct mlx5_ib_mr *imr, ret = pagefault_real_mr(mtt, umem_odp, user_va, len, bytes_mapped, flags); + + mlx5r_deref_odp_mkey(&mtt->mmkey); + if (ret < 0) goto out; user_va += len; @@ -803,6 +793,44 @@ out: return ret; } +static int pagefault_dmabuf_mr(struct mlx5_ib_mr *mr, size_t bcnt, + u32 *bytes_mapped, u32 flags) +{ + struct ib_umem_dmabuf *umem_dmabuf = to_ib_umem_dmabuf(mr->umem); + u32 xlt_flags = 0; + int err; + unsigned int page_size; + + if (flags & MLX5_PF_FLAGS_ENABLE) + xlt_flags |= MLX5_IB_UPD_XLT_ENABLE; + + dma_resv_lock(umem_dmabuf->attach->dmabuf->resv, NULL); + err = ib_umem_dmabuf_map_pages(umem_dmabuf); + if (err) { + dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); + return err; + } + + page_size = mlx5_umem_find_best_pgsz(&umem_dmabuf->umem, mkc, + log_page_size, 0, + umem_dmabuf->umem.iova); + if (unlikely(page_size < PAGE_SIZE)) { + ib_umem_dmabuf_unmap_pages(umem_dmabuf); + err = -EINVAL; + } else { + err = mlx5_ib_update_mr_pas(mr, xlt_flags); + } + dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); + + if (err) + return err; + + if (bytes_mapped) + *bytes_mapped += bcnt; + + return ib_umem_num_pages(mr->umem); +} + /* * Returns: * -EFAULT: The io_virt->bcnt is not within the MR, it covers pages that are @@ -817,10 +845,12 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, { struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); - lockdep_assert_held(&mr_to_mdev(mr)->odp_srcu); if (unlikely(io_virt < mr->mmkey.iova)) return -EFAULT; + if (mr->umem->is_dmabuf) + return pagefault_dmabuf_mr(mr, bcnt, bytes_mapped, flags); + if (!odp->is_implicit_odp) { u64 user_va; @@ -847,6 +877,16 @@ int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr) return ret >= 0 ? 0 : ret; } +int mlx5_ib_init_dmabuf_mr(struct mlx5_ib_mr *mr) +{ + int ret; + + ret = pagefault_dmabuf_mr(mr, mr->umem->length, NULL, + MLX5_PF_FLAGS_ENABLE); + + return ret >= 0 ? 0 : ret; +} + struct pf_frame { struct pf_frame *next; u32 key; @@ -896,7 +936,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, u32 *bytes_committed, u32 *bytes_mapped) { - int npages = 0, srcu_key, ret, i, outlen, cur_outlen = 0, depth = 0; + int npages = 0, ret, i, outlen, cur_outlen = 0, depth = 0; struct pf_frame *head = NULL, *frame; struct mlx5_core_mkey *mmkey; struct mlx5_ib_mr *mr; @@ -905,14 +945,14 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, size_t offset; int ndescs; - srcu_key = srcu_read_lock(&dev->odp_srcu); - io_virt += *bytes_committed; bcnt -= *bytes_committed; next_mr: + xa_lock(&dev->odp_mkeys); mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(key)); if (!mmkey) { + xa_unlock(&dev->odp_mkeys); mlx5_ib_dbg( dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n", @@ -925,12 +965,15 @@ next_mr: * faulted. */ ret = 0; - goto srcu_unlock; + goto end; } + refcount_inc(&mmkey->usecount); + xa_unlock(&dev->odp_mkeys); + if (!mkey_is_eq(mmkey, key)) { mlx5_ib_dbg(dev, "failed to find mkey %x\n", key); ret = -EFAULT; - goto srcu_unlock; + goto end; } switch (mmkey->type) { @@ -939,7 +982,7 @@ next_mr: ret = pagefault_mr(mr, io_virt, bcnt, bytes_mapped, 0); if (ret < 0) - goto srcu_unlock; + goto end; mlx5_update_odp_stats(mr, faults, ret); @@ -954,7 +997,7 @@ next_mr: if (depth >= MLX5_CAP_GEN(dev->mdev, max_indirection)) { mlx5_ib_dbg(dev, "indirection level exceeded\n"); ret = -EFAULT; - goto srcu_unlock; + goto end; } outlen = MLX5_ST_SZ_BYTES(query_mkey_out) + @@ -965,7 +1008,7 @@ next_mr: out = kzalloc(outlen, GFP_KERNEL); if (!out) { ret = -ENOMEM; - goto srcu_unlock; + goto end; } cur_outlen = outlen; } @@ -975,7 +1018,7 @@ next_mr: ret = mlx5_core_query_mkey(dev->mdev, mmkey, out, outlen); if (ret) - goto srcu_unlock; + goto end; offset = io_virt - MLX5_GET64(query_mkey_out, out, memory_key_mkey_entry.start_addr); @@ -989,7 +1032,7 @@ next_mr: frame = kzalloc(sizeof(*frame), GFP_KERNEL); if (!frame) { ret = -ENOMEM; - goto srcu_unlock; + goto end; } frame->key = be32_to_cpu(pklm->key); @@ -1008,7 +1051,7 @@ next_mr: default: mlx5_ib_dbg(dev, "wrong mkey type %d\n", mmkey->type); ret = -EFAULT; - goto srcu_unlock; + goto end; } if (head) { @@ -1021,10 +1064,13 @@ next_mr: depth = frame->depth; kfree(frame); + mlx5r_deref_odp_mkey(mmkey); goto next_mr; } -srcu_unlock: +end: + if (mmkey) + mlx5r_deref_odp_mkey(mmkey); while (head) { frame = head; head = frame->next; @@ -1032,7 +1078,6 @@ srcu_unlock: } kfree(out); - srcu_read_unlock(&dev->odp_srcu, srcu_key); *bytes_committed = 0; return ret ? ret : npages; } @@ -1040,16 +1085,18 @@ srcu_unlock: /** * Parse a series of data segments for page fault handling. * - * @pfault contains page fault information. - * @wqe points at the first data segment in the WQE. - * @wqe_end points after the end of the WQE. - * @bytes_mapped receives the number of bytes that the function was able to - * map. This allows the caller to decide intelligently whether - * enough memory was mapped to resolve the page fault - * successfully (e.g. enough for the next MTU, or the entire - * WQE). - * @total_wqe_bytes receives the total data size of this WQE in bytes (minus - * the committed bytes). + * @dev: Pointer to mlx5 IB device + * @pfault: contains page fault information. + * @wqe: points at the first data segment in the WQE. + * @wqe_end: points after the end of the WQE. + * @bytes_mapped: receives the number of bytes that the function was able to + * map. This allows the caller to decide intelligently whether + * enough memory was mapped to resolve the page fault + * successfully (e.g. enough for the next MTU, or the entire + * WQE). + * @total_wqe_bytes: receives the total data size of this WQE in bytes (minus + * the committed bytes). + * @receive_queue: receive WQE end of sg list * * Returns the number of pages loaded if positive, zero for an empty WQE, or a * negative error code. @@ -1738,8 +1785,8 @@ static void destroy_prefetch_work(struct prefetch_mr_work *work) u32 i; for (i = 0; i < work->num_sge; ++i) - if (atomic_dec_and_test(&work->frags[i].mr->num_deferred_work)) - wake_up(&work->frags[i].mr->q_deferred_work); + mlx5r_deref_odp_mkey(&work->frags[i].mr->mmkey); + kvfree(work); } @@ -1749,27 +1796,30 @@ get_prefetchable_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_core_mkey *mmkey; - struct ib_umem_odp *odp; - struct mlx5_ib_mr *mr; - - lockdep_assert_held(&dev->odp_srcu); + struct mlx5_ib_mr *mr = NULL; + xa_lock(&dev->odp_mkeys); mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(lkey)); if (!mmkey || mmkey->key != lkey || mmkey->type != MLX5_MKEY_MR) - return NULL; + goto end; mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); - if (mr->ibmr.pd != pd) - return NULL; - - odp = to_ib_umem_odp(mr->umem); + if (mr->ibmr.pd != pd) { + mr = NULL; + goto end; + } /* prefetch with write-access must be supported by the MR */ if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE && - !odp->umem.writable) - return NULL; + !mr->umem->writable) { + mr = NULL; + goto end; + } + refcount_inc(&mmkey->usecount); +end: + xa_unlock(&dev->odp_mkeys); return mr; } @@ -1777,17 +1827,12 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w) { struct prefetch_mr_work *work = container_of(w, struct prefetch_mr_work, work); - struct mlx5_ib_dev *dev; u32 bytes_mapped = 0; - int srcu_key; int ret; u32 i; /* We rely on IB/core that work is executed if we have num_sge != 0 only. */ WARN_ON(!work->num_sge); - dev = mr_to_mdev(work->frags[0].mr); - /* SRCU should be held when calling to mlx5_odp_populate_xlt() */ - srcu_key = srcu_read_lock(&dev->odp_srcu); for (i = 0; i < work->num_sge; ++i) { ret = pagefault_mr(work->frags[i].mr, work->frags[i].io_virt, work->frags[i].length, &bytes_mapped, @@ -1796,7 +1841,6 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w) continue; mlx5_update_odp_stats(work->frags[i].mr, prefetch, ret); } - srcu_read_unlock(&dev->odp_srcu, srcu_key); destroy_prefetch_work(work); } @@ -1820,9 +1864,6 @@ static bool init_prefetch_work(struct ib_pd *pd, work->num_sge = i; return false; } - - /* Keep the MR pointer will valid outside the SRCU */ - atomic_inc(&work->frags[i].mr->num_deferred_work); } work->num_sge = num_sge; return true; @@ -1833,42 +1874,35 @@ static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, u32 pf_flags, struct ib_sge *sg_list, u32 num_sge) { - struct mlx5_ib_dev *dev = to_mdev(pd->device); u32 bytes_mapped = 0; - int srcu_key; int ret = 0; u32 i; - srcu_key = srcu_read_lock(&dev->odp_srcu); for (i = 0; i < num_sge; ++i) { struct mlx5_ib_mr *mr; mr = get_prefetchable_mr(pd, advice, sg_list[i].lkey); - if (!mr) { - ret = -ENOENT; - goto out; - } + if (!mr) + return -ENOENT; ret = pagefault_mr(mr, sg_list[i].addr, sg_list[i].length, &bytes_mapped, pf_flags); - if (ret < 0) - goto out; + if (ret < 0) { + mlx5r_deref_odp_mkey(&mr->mmkey); + return ret; + } mlx5_update_odp_stats(mr, prefetch, ret); + mlx5r_deref_odp_mkey(&mr->mmkey); } - ret = 0; -out: - srcu_read_unlock(&dev->odp_srcu, srcu_key); - return ret; + return 0; } int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 flags, struct ib_sge *sg_list, u32 num_sge) { - struct mlx5_ib_dev *dev = to_mdev(pd->device); u32 pf_flags = 0; struct prefetch_mr_work *work; - int srcu_key; if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH) pf_flags |= MLX5_PF_FLAGS_DOWNGRADE; @@ -1884,13 +1918,10 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, if (!work) return -ENOMEM; - srcu_key = srcu_read_lock(&dev->odp_srcu); if (!init_prefetch_work(pd, advice, pf_flags, work, sg_list, num_sge)) { - srcu_read_unlock(&dev->odp_srcu, srcu_key); destroy_prefetch_work(work); return -EINVAL; } queue_work(system_unbound_wq, &work->work); - srcu_read_unlock(&dev->odp_srcu, srcu_key); return 0; } diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 0cb7cc642d87..ec4b3f6a8222 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1078,6 +1078,7 @@ static int _create_kernel_qp(struct mlx5_ib_dev *dev, qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc); MLX5_SET(qpc, qpc, uar_page, uar_index); + MLX5_SET(qpc, qpc, ts_format, MLX5_QPC_TIMESTAMP_FORMAT_DEFAULT); MLX5_SET(qpc, qpc, log_page_size, qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); /* Set "fast registration enabled" for all kernel QPs */ @@ -1172,10 +1173,72 @@ static void destroy_flow_rule_vport_sq(struct mlx5_ib_sq *sq) sq->flow_rule = NULL; } +static int get_rq_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq) +{ + bool fr_supported = + MLX5_CAP_GEN(dev->mdev, rq_ts_format) == + MLX5_RQ_TIMESTAMP_FORMAT_CAP_FREE_RUNNING || + MLX5_CAP_GEN(dev->mdev, rq_ts_format) == + MLX5_RQ_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME; + + if (send_cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION) { + if (!fr_supported) { + mlx5_ib_dbg(dev, "Free running TS format is not supported\n"); + return -EOPNOTSUPP; + } + return MLX5_RQC_TIMESTAMP_FORMAT_FREE_RUNNING; + } + return MLX5_RQC_TIMESTAMP_FORMAT_DEFAULT; +} + +static int get_sq_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq) +{ + bool fr_supported = + MLX5_CAP_GEN(dev->mdev, sq_ts_format) == + MLX5_SQ_TIMESTAMP_FORMAT_CAP_FREE_RUNNING || + MLX5_CAP_GEN(dev->mdev, sq_ts_format) == + MLX5_SQ_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME; + + if (send_cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION) { + if (!fr_supported) { + mlx5_ib_dbg(dev, "Free running TS format is not supported\n"); + return -EOPNOTSUPP; + } + return MLX5_SQC_TIMESTAMP_FORMAT_FREE_RUNNING; + } + return MLX5_SQC_TIMESTAMP_FORMAT_DEFAULT; +} + +static int get_qp_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq, + struct mlx5_ib_cq *recv_cq) +{ + bool fr_supported = + MLX5_CAP_ROCE(dev->mdev, qp_ts_format) == + MLX5_QP_TIMESTAMP_FORMAT_CAP_FREE_RUNNING || + MLX5_CAP_ROCE(dev->mdev, qp_ts_format) == + MLX5_QP_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME; + int ts_format = MLX5_QPC_TIMESTAMP_FORMAT_DEFAULT; + + if (recv_cq && + recv_cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION) + ts_format = MLX5_QPC_TIMESTAMP_FORMAT_FREE_RUNNING; + + if (send_cq && + send_cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION) + ts_format = MLX5_QPC_TIMESTAMP_FORMAT_FREE_RUNNING; + + if (ts_format == MLX5_QPC_TIMESTAMP_FORMAT_FREE_RUNNING && + !fr_supported) { + mlx5_ib_dbg(dev, "Free running TS format is not supported\n"); + return -EOPNOTSUPP; + } + return ts_format; +} + static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, struct ib_udata *udata, struct mlx5_ib_sq *sq, void *qpin, - struct ib_pd *pd) + struct ib_pd *pd, struct mlx5_ib_cq *cq) { struct mlx5_ib_ubuffer *ubuffer = &sq->ubuffer; __be64 *pas; @@ -1187,6 +1250,11 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, int err; unsigned int page_offset_quantized; unsigned long page_size; + int ts_format; + + ts_format = get_sq_ts_format(dev, cq); + if (ts_format < 0) + return ts_format; sq->ubuffer.umem = ib_umem_get(&dev->ib_dev, ubuffer->buf_addr, ubuffer->buf_size, 0); @@ -1215,6 +1283,7 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, if (MLX5_CAP_ETH(dev->mdev, multi_pkt_send_wqe)) MLX5_SET(sqc, sqc, allow_multi_pkt_send_wqe, 1); MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); + MLX5_SET(sqc, sqc, ts_format, ts_format); MLX5_SET(sqc, sqc, user_index, MLX5_GET(qpc, qpc, user_index)); MLX5_SET(sqc, sqc, cqn, MLX5_GET(qpc, qpc, cqn_snd)); MLX5_SET(sqc, sqc, tis_lst_sz, 1); @@ -1263,7 +1332,7 @@ static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev, static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq, void *qpin, - struct ib_pd *pd) + struct ib_pd *pd, struct mlx5_ib_cq *cq) { struct mlx5_ib_qp *mqp = rq->base.container_mibqp; __be64 *pas; @@ -1274,9 +1343,14 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, struct ib_umem *umem = rq->base.ubuffer.umem; unsigned int page_offset_quantized; unsigned long page_size = 0; + int ts_format; size_t inlen; int err; + ts_format = get_rq_ts_format(dev, cq); + if (ts_format < 0) + return ts_format; + page_size = mlx5_umem_find_best_quantized_pgoff(umem, wq, log_wq_pg_sz, MLX5_ADAPTER_PAGE_SHIFT, page_offset, 64, @@ -1296,6 +1370,7 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, MLX5_SET(rqc, rqc, vsd, 1); MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE); MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); + MLX5_SET(rqc, rqc, ts_format, ts_format); MLX5_SET(rqc, rqc, flush_in_error_en, 1); MLX5_SET(rqc, rqc, user_index, MLX5_GET(qpc, qpc, user_index)); MLX5_SET(rqc, rqc, cqn, MLX5_GET(qpc, qpc, cqn_rcv)); @@ -1393,10 +1468,10 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev, } static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, - u32 *in, size_t inlen, - struct ib_pd *pd, + u32 *in, size_t inlen, struct ib_pd *pd, struct ib_udata *udata, - struct mlx5_ib_create_qp_resp *resp) + struct mlx5_ib_create_qp_resp *resp, + struct ib_qp_init_attr *init_attr) { struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp; struct mlx5_ib_sq *sq = &raw_packet_qp->sq; @@ -1415,7 +1490,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (err) return err; - err = create_raw_packet_qp_sq(dev, udata, sq, in, pd); + err = create_raw_packet_qp_sq(dev, udata, sq, in, pd, + to_mcq(init_attr->send_cq)); if (err) goto err_destroy_tis; @@ -1437,7 +1513,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING; if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING) rq->flags |= MLX5_IB_RQ_PCI_WRITE_END_PADDING; - err = create_raw_packet_qp_rq(dev, rq, in, pd); + err = create_raw_packet_qp_rq(dev, rq, in, pd, + to_mcq(init_attr->recv_cq)); if (err) goto err_destroy_sq; @@ -1907,6 +1984,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_cq *recv_cq; unsigned long flags; struct mlx5_ib_qp_base *base; + int ts_format; int mlx5_st; void *qpc; u32 *in; @@ -1944,6 +2022,13 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (ucmd->sq_wqe_count > (1 << MLX5_CAP_GEN(mdev, log_max_qp_sz))) return -EINVAL; + if (init_attr->qp_type != IB_QPT_RAW_PACKET) { + ts_format = get_qp_ts_format(dev, to_mcq(init_attr->send_cq), + to_mcq(init_attr->recv_cq)); + if (ts_format < 0) + return ts_format; + } + err = _create_user_qp(dev, pd, qp, udata, init_attr, &in, ¶ms->resp, &inlen, base, ucmd); if (err) @@ -1992,6 +2077,9 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, MLX5_SET(qpc, qpc, log_rq_size, ilog2(qp->rq.wqe_cnt)); } + if (init_attr->qp_type != IB_QPT_RAW_PACKET) + MLX5_SET(qpc, qpc, ts_format, ts_format); + MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, init_attr)); if (qp->sq.wqe_cnt) { @@ -2046,7 +2134,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd->sq_buf_addr; raw_packet_qp_copy_info(qp, &qp->raw_packet_qp); err = create_raw_packet_qp(dev, qp, in, inlen, pd, udata, - ¶ms->resp); + ¶ms->resp, init_attr); } else err = mlx5_qpc_create_qp(dev, &base->mqp, in, inlen, out); @@ -2432,9 +2520,6 @@ static int check_qp_type(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, case MLX5_IB_QPT_HW_GSI: case IB_QPT_DRIVER: case IB_QPT_GSI: - if (dev->profile == &raw_eth_profile) - goto out; - fallthrough; case IB_QPT_RAW_PACKET: case IB_QPT_UD: case MLX5_IB_QPT_REG_UMR: @@ -2629,10 +2714,6 @@ static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, int create_flags = attr->create_flags; bool cond; - if (qp->type == IB_QPT_UD && dev->profile == &raw_eth_profile) - if (create_flags & ~MLX5_IB_QP_CREATE_WC_TEST) - return -EINVAL; - if (qp_type == MLX5_IB_QPT_DCT) return (create_flags) ? -EINVAL : 0; @@ -3076,6 +3157,8 @@ static int ib_to_mlx5_rate_map(u8 rate) return 4; case IB_RATE_50_GBPS: return 5; + case IB_RATE_400_GBPS: + return 6; default: return rate + MLX5_STAT_RATE_OFFSET; } @@ -3183,11 +3266,13 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, alt ? attr->alt_pkey_index : attr->pkey_index); if (ah_flags & IB_AH_GRH) { - if (grh->sgid_index >= - dev->mdev->port_caps[port - 1].gid_table_len) { + const struct ib_port_immutable *immutable; + + immutable = ib_port_immutable_read(&dev->ib_dev, port); + if (grh->sgid_index >= immutable->gid_tbl_len) { pr_err("sgid_index (%u) too large. max is %d\n", grh->sgid_index, - dev->mdev->port_caps[port - 1].gid_table_len); + immutable->gid_tbl_len); return -EINVAL; } } @@ -4211,6 +4296,23 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, return 0; } +static bool mlx5_ib_modify_qp_allowed(struct mlx5_ib_dev *dev, + struct mlx5_ib_qp *qp, + enum ib_qp_type qp_type) +{ + if (dev->profile != &raw_eth_profile) + return true; + + if (qp_type == IB_QPT_RAW_PACKET || qp_type == MLX5_IB_QPT_REG_UMR) + return true; + + /* Internal QP used for wc testing, with NOPs in wq */ + if (qp->flags & MLX5_IB_QP_CREATE_WC_TEST) + return true; + + return false; +} + int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { @@ -4221,7 +4323,9 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, enum ib_qp_type qp_type; enum ib_qp_state cur_state, new_state; int err = -EINVAL; - int port; + + if (!mlx5_ib_modify_qp_allowed(dev, qp, ibqp->qp_type)) + return -EOPNOTSUPP; if (attr_mask & ~(IB_QP_ATTR_STANDARD_BITS | IB_QP_RATE_LIMIT)) return -EOPNOTSUPP; @@ -4263,10 +4367,6 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; - if (!(cur_state == new_state && cur_state == IB_QPS_RESET)) { - port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; - } - if (qp->flags & IB_QP_CREATE_SOURCE_QPN) { if (attr_mask & ~(IB_QP_STATE | IB_QP_CUR_STATE)) { mlx5_ib_dbg(dev, "invalid attr_mask 0x%x when underlay QP is used\n", @@ -4295,14 +4395,10 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; } - if (attr_mask & IB_QP_PKEY_INDEX) { - port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; - if (attr->pkey_index >= - dev->mdev->port_caps[port - 1].pkey_table_len) { - mlx5_ib_dbg(dev, "invalid pkey index %d\n", - attr->pkey_index); - goto out; - } + if ((attr_mask & IB_QP_PKEY_INDEX) && + attr->pkey_index >= dev->pkey_table_len) { + mlx5_ib_dbg(dev, "invalid pkey index %d\n", attr->pkey_index); + goto out; } if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && @@ -5376,7 +5472,7 @@ void mlx5_ib_drain_rq(struct ib_qp *qp) handle_drain_completion(cq, &rdrain, dev); } -/** +/* * Bind a qp to a counter. If @counter is NULL then bind the qp to * the default counter */ diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c index d6038fb6c50c..cf2852cba45c 100644 --- a/drivers/infiniband/hw/mlx5/wr.c +++ b/drivers/infiniband/hw/mlx5/wr.c @@ -1369,7 +1369,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, handle_qpt_uc(wr, &seg, &size); break; case IB_QPT_SMI: - if (unlikely(!mdev->port_caps[qp->port - 1].has_smi)) { + if (unlikely(!dev->port_caps[qp->port - 1].has_smi)) { mlx5_ib_warn(dev, "Send SMP MADs is not allowed\n"); err = -EPERM; *bad_wr = wr; diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 9dde70373a55..3cb4febaad0f 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -617,18 +617,18 @@ static inline bool qedr_qp_has_srq(struct qedr_qp *qp) static inline bool qedr_qp_has_sq(struct qedr_qp *qp) { if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_XRC_TGT) - return 0; + return false; - return 1; + return true; } static inline bool qedr_qp_has_rq(struct qedr_qp *qp) { if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT || qedr_qp_has_srq(qp)) - return 0; + return false; - return 1; + return true; } static inline struct qedr_user_mmap_entry * diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.c b/drivers/infiniband/hw/qedr/qedr_roce_cm.c index f5542d703ef9..13e5e6bbec99 100644 --- a/drivers/infiniband/hw/qedr/qedr_roce_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.c @@ -586,8 +586,8 @@ int qedr_gsi_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id; qedr_inc_sw_prod(&qp->sq); DP_DEBUG(qp->dev, QEDR_MSG_GSI, - "gsi post send: opcode=%d, in_irq=%ld, irqs_disabled=%d, wr_id=%llx\n", - wr->opcode, in_irq(), irqs_disabled(), wr->wr_id); + "gsi post send: opcode=%d, wr_id=%llx\n", wr->opcode, + wr->wr_id); } else { DP_ERR(dev, "gsi post send: failed to transmit (rc=%d)\n", rc); rc = -EAGAIN; diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 92eeea5679e2..84fc4dcc5399 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -151,7 +151,7 @@ int qib_count_units(int *npresentp, int *nupp) /** * qib_wait_linkstate - wait for an IB link state change to occur - * @dd: the qlogic_ib device + * @ppd: the qlogic_ib device * @state: the state to wait for * @msecs: the number of milliseconds to wait * diff --git a/drivers/infiniband/hw/qib/qib_eeprom.c b/drivers/infiniband/hw/qib/qib_eeprom.c index 5838b3bf34b9..bf660c001b6d 100644 --- a/drivers/infiniband/hw/qib/qib_eeprom.c +++ b/drivers/infiniband/hw/qib/qib_eeprom.c @@ -47,7 +47,7 @@ * qib_eeprom_read - receives bytes from the eeprom via I2C * @dd: the qlogic_ib device * @eeprom_offset: address to read from - * @buffer: where to store result + * @buff: where to store result * @len: number of bytes to receive */ int qib_eeprom_read(struct qib_devdata *dd, u8 eeprom_offset, @@ -94,7 +94,7 @@ static int eeprom_write_with_enable(struct qib_devdata *dd, u8 offset, * qib_eeprom_write - writes data to the eeprom via I2C * @dd: the qlogic_ib device * @eeprom_offset: where to place data - * @buffer: data to write + * @buff: data to write * @len: number of bytes to write */ int qib_eeprom_write(struct qib_devdata *dd, u8 eeprom_offset, diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index 44150be215bf..b35e1174be22 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -1223,7 +1223,7 @@ static void qib_set_ib_6120_lstate(struct qib_pportdata *ppd, u16 linkcmd, /** * qib_6120_bringup_serdes - bring up the serdes - * @dd: the qlogic_ib device + * @ppd: the qlogic_ib device */ static int qib_6120_bringup_serdes(struct qib_pportdata *ppd) { @@ -1412,7 +1412,7 @@ static void qib_6120_quiet_serdes(struct qib_pportdata *ppd) /** * qib_6120_setup_setextled - set the state of the two external LEDs - * @dd: the qlogic_ib device + * @ppd: the qlogic_ib device * @on: whether the link is up or not * * The exact combo of LEDs if on is true is determined by looking @@ -1823,7 +1823,7 @@ bail: * qib_6120_put_tid - write a TID in chip * @dd: the qlogic_ib device * @tidptr: pointer to the expected TID (in chip) to update - * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) + * @type: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) * for expected * @pa: physical address of in memory buffer; tidinvalid if freeing * @@ -1890,7 +1890,7 @@ static void qib_6120_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, * qib_6120_put_tid_2 - write a TID in chip, Revision 2 or higher * @dd: the qlogic_ib device * @tidptr: pointer to the expected TID (in chip) to update - * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) + * @type: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) * for expected * @pa: physical address of in memory buffer; tidinvalid if freeing * @@ -1932,7 +1932,7 @@ static void qib_6120_put_tid_2(struct qib_devdata *dd, u64 __iomem *tidptr, /** * qib_6120_clear_tids - clear all TID entries for a context, expected and eager * @dd: the qlogic_ib device - * @ctxt: the context + * @rcd: the context * * clear all TID entries for a context, expected and eager. * Used from qib_close(). On this chip, TIDs are only 32 bits, @@ -2008,7 +2008,7 @@ int __attribute__((weak)) qib_unordered_wc(void) /** * qib_6120_get_base_info - set chip-specific flags for user code * @rcd: the qlogic_ib ctxt - * @kbase: qib_base_info pointer + * @kinfo: qib_base_info pointer * * We set the PCIE flag because the lower bandwidth on PCIe vs * HyperTransport can affect some user packet algorithms. @@ -2270,8 +2270,8 @@ static void sendctrl_6120_mod(struct qib_pportdata *ppd, u32 op) /** * qib_portcntr_6120 - read a per-port counter - * @dd: the qlogic_ib device - * @creg: the counter to snapshot + * @ppd: the qlogic_ib device + * @reg: the counter to snapshot */ static u64 qib_portcntr_6120(struct qib_pportdata *ppd, u32 reg) { @@ -2610,7 +2610,7 @@ static void qib_chk_6120_errormask(struct qib_devdata *dd) /** * qib_get_faststats - get word counters from chip before they overflow - * @opaque - contains a pointer to the qlogic_ib device qib_devdata + * @t: contains a pointer to the qlogic_ib device qib_devdata * * This needs more work; in particular, decision on whether we really * need traffic_wds done the way it is diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 0a6f26d4cb31..229dcd6ead95 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -1701,7 +1701,7 @@ static void qib_7220_quiet_serdes(struct qib_pportdata *ppd) /** * qib_setup_7220_setextled - set the state of the two external LEDs - * @dd: the qlogic_ib device + * @ppd: the qlogic_ib device * @on: whether the link is up or not * * The exact combo of LEDs if on is true is determined by looking @@ -2146,7 +2146,7 @@ bail: * qib_7220_put_tid - write a TID to the chip * @dd: the qlogic_ib device * @tidptr: pointer to the expected TID (in chip) to update - * @tidtype: 0 for eager, 1 for expected + * @type: 0 for eager, 1 for expected * @pa: physical address of in memory buffer; tidinvalid if freeing */ static void qib_7220_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, @@ -2180,7 +2180,7 @@ static void qib_7220_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, /** * qib_7220_clear_tids - clear all TID entries for a ctxt, expected and eager * @dd: the qlogic_ib device - * @ctxt: the ctxt + * @rcd: the ctxt * * clear all TID entries for a ctxt, expected and eager. * Used from qib_close(). On this chip, TIDs are only 32 bits, @@ -2238,7 +2238,7 @@ static void qib_7220_tidtemplate(struct qib_devdata *dd) /** * qib_init_7220_get_base_info - set chip-specific flags for user code * @rcd: the qlogic_ib ctxt - * @kbase: qib_base_info pointer + * @kinfo: qib_base_info pointer * * We set the PCIE flag because the lower bandwidth on PCIe vs * HyperTransport can affect some user packet algorithims. @@ -2896,8 +2896,8 @@ static void sendctrl_7220_mod(struct qib_pportdata *ppd, u32 op) /** * qib_portcntr_7220 - read a per-port counter - * @dd: the qlogic_ib device - * @creg: the counter to snapshot + * @ppd: the qlogic_ib device + * @reg: the counter to snapshot */ static u64 qib_portcntr_7220(struct qib_pportdata *ppd, u32 reg) { @@ -3232,7 +3232,7 @@ done: /** * qib_get_7220_faststats - get word counters from chip before they overflow - * @opaque - contains a pointer to the qlogic_ib device qib_devdata + * @t: contains a pointer to the qlogic_ib device qib_devdata * * This needs more work; in particular, decision on whether we really * need traffic_wds done the way it is @@ -4468,7 +4468,7 @@ static int qib_7220_eeprom_wen(struct qib_devdata *dd, int wen) /** * qib_init_iba7220_funcs - set up the chip-specific function pointers - * @dev: the pci_dev for qlogic_ib device + * @pdev: the pci_dev for qlogic_ib device * @ent: pci_device_id struct for this dev * * This is global, and is called directly at init to set up the diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 189a0ce6056a..9fe6ea75b45e 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -2514,7 +2514,7 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd) /** * qib_7322_quiet_serdes - set serdes to txidle - * @dd: the qlogic_ib device + * @ppd: the qlogic_ib device * Called when driver is being unloaded */ static void qib_7322_mini_quiet_serdes(struct qib_pportdata *ppd) @@ -3760,7 +3760,7 @@ bail: * qib_7322_put_tid - write a TID to the chip * @dd: the qlogic_ib device * @tidptr: pointer to the expected TID (in chip) to update - * @tidtype: 0 for eager, 1 for expected + * @type: 0 for eager, 1 for expected * @pa: physical address of in memory buffer; tidinvalid if freeing */ static void qib_7322_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, @@ -3796,7 +3796,7 @@ static void qib_7322_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, /** * qib_7322_clear_tids - clear all TID entries for a ctxt, expected and eager * @dd: the qlogic_ib device - * @ctxt: the ctxt + * @rcd: the ctxt * * clear all TID entries for a ctxt, expected and eager. * Used from qib_close(). @@ -3861,7 +3861,7 @@ static void qib_7322_tidtemplate(struct qib_devdata *dd) /** * qib_init_7322_get_base_info - set chip-specific flags for user code * @rcd: the qlogic_ib ctxt - * @kbase: qib_base_info pointer + * @kinfo: qib_base_info pointer * * We set the PCIE flag because the lower bandwidth on PCIe vs * HyperTransport can affect some user packet algorithims. @@ -4724,7 +4724,7 @@ static void sendctrl_7322_mod(struct qib_pportdata *ppd, u32 op) /** * qib_portcntr_7322 - read a per-port chip counter * @ppd: the qlogic_ib pport - * @creg: the counter to read (not a chip offset) + * @reg: the counter to read (not a chip offset) */ static u64 qib_portcntr_7322(struct qib_pportdata *ppd, u32 reg) { @@ -5096,7 +5096,7 @@ done: /** * qib_get_7322_faststats - get word counters from chip before they overflow - * @opaque - contains a pointer to the qlogic_ib device qib_devdata + * @t: contains a pointer to the qlogic_ib device qib_devdata * * VESTIGIAL IBA7322 has no "small fast counters", so the only * real purpose of this function is to maintain the notion of @@ -7175,7 +7175,7 @@ static int qib_7322_tempsense_rd(struct qib_devdata *dd, int regnum) /** * qib_init_iba7322_funcs - set up the chip-specific function pointers - * @dev: the pci_dev for qlogic_ib device + * @pdev: the pci_dev for qlogic_ib device * @ent: pci_device_id struct for this dev * * Also allocates, inits, and returns the devdata struct for this diff --git a/drivers/infiniband/hw/qib/qib_intr.c b/drivers/infiniband/hw/qib/qib_intr.c index 65c3b964ad1b..85c3187d796d 100644 --- a/drivers/infiniband/hw/qib/qib_intr.c +++ b/drivers/infiniband/hw/qib/qib_intr.c @@ -40,9 +40,9 @@ /** * qib_format_hwmsg - format a single hwerror message - * @msg message buffer - * @msgl length of message buffer - * @hwmsg message to add to message buffer + * @msg: message buffer + * @msgl: length of message buffer + * @hwmsg: message to add to message buffer */ static void qib_format_hwmsg(char *msg, size_t msgl, const char *hwmsg) { @@ -53,11 +53,11 @@ static void qib_format_hwmsg(char *msg, size_t msgl, const char *hwmsg) /** * qib_format_hwerrors - format hardware error messages for display - * @hwerrs hardware errors bit vector - * @hwerrmsgs hardware error descriptions - * @nhwerrmsgs number of hwerrmsgs - * @msg message buffer - * @msgl message buffer length + * @hwerrs: hardware errors bit vector + * @hwerrmsgs: hardware error descriptions + * @nhwerrmsgs: number of hwerrmsgs + * @msg: message buffer + * @msgl: message buffer length */ void qib_format_hwerrors(u64 hwerrs, const struct qib_hwerror_msgs *hwerrmsgs, size_t nhwerrmsgs, char *msg, size_t msgl) diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index f83e331977f8..44e2f813024a 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -886,7 +886,7 @@ done: /** * rm_pkey - decrecment the reference count for the given PKEY - * @dd: the qlogic_ib device + * @ppd: the qlogic_ib device * @key: the PKEY index * * Return true if this was the last reference and the hardware table entry @@ -916,7 +916,7 @@ bail: /** * add_pkey - add the given PKEY to the hardware table - * @dd: the qlogic_ib device + * @ppd: the qlogic_ib device * @key: the PKEY * * Return an error code if unable to add the entry, zero if no change, @@ -2346,8 +2346,10 @@ static int process_cc(struct ib_device *ibdev, int mad_flags, * @port: the port number this packet came in on * @in_wc: the work completion entry for this packet * @in_grh: the global route header for this packet - * @in_mad: the incoming MAD - * @out_mad: any outgoing MAD reply + * @in: the incoming MAD + * @out: any outgoing MAD reply + * @out_mad_size: size of the outgoing MAD reply + * @out_mad_pkey_index: unused * * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not * interested in processing. diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c index 2e07b3749b88..cb2a02d671e2 100644 --- a/drivers/infiniband/hw/qib/qib_pcie.c +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -181,7 +181,7 @@ void qib_pcie_ddcleanup(struct qib_devdata *dd) pci_set_drvdata(dd->pcidev, NULL); } -/** +/* * We save the msi lo and hi values, so we can restore them after * chip reset (the kernel PCI infrastructure doesn't yet handle that * correctly. diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 8d0563ef5be1..ca39a029e4af 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -207,7 +207,7 @@ bail: return ret; } -/** +/* * qib_free_all_qps - check for QPs still in use */ unsigned qib_free_all_qps(struct rvt_dev_info *rdi) @@ -376,9 +376,9 @@ void qib_flush_qp_waiters(struct rvt_qp *qp) /** * qib_check_send_wqe - validate wr/wqe - * @qp - The qp - * @wqe - The built wqe - * @call_send - Determine if the send should be posted or scheduled + * @qp: The qp + * @wqe: The built wqe + * @call_send: Determine if the send should be posted or scheduled * * Returns 0 on success, -EINVAL on failure */ @@ -418,8 +418,8 @@ static const char * const qp_type_str[] = { /** * qib_qp_iter_print - print information to seq_file - * @s - the seq_file - * @iter - the iterator + * @s: the seq_file + * @iter: the iterator */ void qib_qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter) { diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 3915e5b4a9bc..a1c20ffb4490 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -207,6 +207,7 @@ bail: /** * qib_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC) * @qp: a pointer to the QP + * @flags: unused * * Assumes the s_lock is held. * @@ -992,7 +993,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, return wqe; } -/** +/* * do_rc_ack - process an incoming RC ACK * @qp: the QP the ACK came in on * @psn: the packet sequence number of the ACK @@ -1259,6 +1260,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct qib_ibport *ibp, u32 psn, * @psn: the packet sequence number for this packet * @hdrsize: the header length * @pmtu: the path MTU + * @rcd: the context pointer * * This is called from qib_rc_rcv() to process an incoming RC response * packet for the given QP. @@ -1480,6 +1482,7 @@ bail: * @opcode: the opcode for this packet * @psn: the packet sequence number for this packet * @diff: the difference between the PSN and the expected PSN + * @rcd: the context pointer * * This is called from qib_rc_rcv() to process an unexpected * incoming RC packet for the given QP. diff --git a/drivers/infiniband/hw/qib/qib_twsi.c b/drivers/infiniband/hw/qib/qib_twsi.c index f5698664419b..97b8a2bf5c69 100644 --- a/drivers/infiniband/hw/qib/qib_twsi.c +++ b/drivers/infiniband/hw/qib/qib_twsi.c @@ -168,6 +168,7 @@ static void stop_cmd(struct qib_devdata *dd); /** * rd_byte - read a byte, sending STOP on last, else ACK * @dd: the qlogic_ib device + * @last: identifies the last read * * Returns byte shifted out of device */ diff --git a/drivers/infiniband/hw/qib/qib_tx.c b/drivers/infiniband/hw/qib/qib_tx.c index 29785eb84646..6a8148851f21 100644 --- a/drivers/infiniband/hw/qib/qib_tx.c +++ b/drivers/infiniband/hw/qib/qib_tx.c @@ -377,6 +377,7 @@ void qib_sendbuf_done(struct qib_devdata *dd, unsigned n) * @start: the starting send buffer number * @len: the number of send buffers * @avail: true if the buffers are available for kernel use, false otherwise + * @rcd: the context pointer */ void qib_chg_pioavailkernel(struct qib_devdata *dd, unsigned start, unsigned len, u32 avail, struct qib_ctxtdata *rcd) diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index 554af4273a13..8e2bda77d8b9 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -40,6 +40,7 @@ /** * qib_make_uc_req - construct a request packet (SEND, RDMA write) * @qp: a pointer to the QP + * @flags: unused * * Assumes the s_lock is held. * diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 93ca21347959..81eda94bd279 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -222,6 +222,7 @@ drop: /** * qib_make_ud_req - construct a UD request packet * @qp: the QP + * @flags: flags to modify and pass back to caller * * Assumes the s_lock is held. * diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c index 4c24e83f3175..5d6cf7427431 100644 --- a/drivers/infiniband/hw/qib/qib_user_pages.c +++ b/drivers/infiniband/hw/qib/qib_user_pages.c @@ -43,7 +43,7 @@ static void __qib_release_user_pages(struct page **p, size_t num_pages, unpin_user_pages_dirty_lock(p, num_pages, dirty); } -/** +/* * qib_map_page - a safety wrapper around pci_map_page() * * A dma_addr of all 0's is interpreted by the chip as "disabled". diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index f6c01bad5a74..8e0de265ad57 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1067,7 +1067,7 @@ bail: /** * qib_get_counters - get various chip counters - * @dd: the qlogic_ib device + * @ppd: the qlogic_ib device * @cntrs: counters are placed here * * Return the counters needed by recv_pma_get_portcounters(). @@ -1675,7 +1675,7 @@ void qib_unregister_ib_device(struct qib_devdata *dd) /** * _qib_schedule_send - schedule progress - * @qp - the qp + * @qp: the qp * * This schedules progress w/o regard to the s_flags. * @@ -1694,7 +1694,7 @@ bool _qib_schedule_send(struct rvt_qp *qp) /** * qib_schedule_send - schedule progress - * @qp - the qp + * @qp: the qp * * This schedules qp progress. The s_lock * should be held. diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 00a330909bb3..4b6019e7de67 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -474,7 +474,6 @@ static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id) int ring_slots = (dev->dsr->cq_ring_pages.num_pages - 1) * PAGE_SIZE / sizeof(struct pvrdma_cqne); unsigned int head; - unsigned long flags; dev_dbg(&dev->pdev->dev, "interrupt x (completion) handler\n"); @@ -483,11 +482,11 @@ static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id) struct pvrdma_cq *cq; cqne = get_cqne(dev, head); - spin_lock_irqsave(&dev->cq_tbl_lock, flags); + spin_lock(&dev->cq_tbl_lock); cq = dev->cq_tbl[cqne->info % dev->dsr->caps.max_cq]; if (cq) refcount_inc(&cq->refcnt); - spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); + spin_unlock(&dev->cq_tbl_lock); if (cq && cq->ibcq.comp_handler) cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 20cc0799ac4b..5138afca067f 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -371,7 +371,7 @@ int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) return ret; } -/** +/* * rvt_resize_cq - change the size of the CQ * @ibcq: the completion queue * diff --git a/drivers/infiniband/sw/rdmavt/mad.c b/drivers/infiniband/sw/rdmavt/mad.c index 108c71e3ac23..fa5be13a4394 100644 --- a/drivers/infiniband/sw/rdmavt/mad.c +++ b/drivers/infiniband/sw/rdmavt/mad.c @@ -56,8 +56,11 @@ * @port_num: the port number this packet came in on, 1 based from ib core * @in_wc: the work completion entry for this packet * @in_grh: the global route header for this packet - * @in_mad: the incoming MAD - * @out_mad: any outgoing MAD reply + * @in: the incoming MAD + * @in_mad_size: size of the incoming MAD reply + * @out: any outgoing MAD reply + * @out_mad_size: size of the outgoing MAD reply + * @out_mad_pkey_index: unused * * Note that the verbs framework has already done the MAD sanity checks, * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c index 5233a63d99a6..951abac13dbb 100644 --- a/drivers/infiniband/sw/rdmavt/mcast.c +++ b/drivers/infiniband/sw/rdmavt/mcast.c @@ -180,7 +180,7 @@ struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid, } EXPORT_SYMBOL(rvt_mcast_find); -/** +/* * rvt_mcast_add - insert mcast GID into table and attach QP struct * @mcast: the mcast GID table * @mqp: the QP to attach diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 90fc234f489a..601d18dda1f5 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -369,6 +369,7 @@ bail: * @pd: protection domain for this memory region * @start: starting userspace address * @length: length of region to register + * @virt_addr: associated virtual address * @mr_access_flags: access flags for this memory region * @udata: unused by the driver * @@ -438,8 +439,8 @@ bail_umem: /** * rvt_dereg_clean_qp_cb - callback from iterator - * @qp - the qp - * @v - the mregion (as u64) + * @qp: the qp + * @v: the mregion (as u64) * * This routine fields the callback for all QPs and * for QPs in the same PD as the MR will call the @@ -457,7 +458,7 @@ static void rvt_dereg_clean_qp_cb(struct rvt_qp *qp, u64 v) /** * rvt_dereg_clean_qps - find QPs for reference cleanup - * @mr - the MR that is being deregistered + * @mr: the MR that is being deregistered * * This routine iterates RC QPs looking for references * to the lkey noted in mr. @@ -471,8 +472,8 @@ static void rvt_dereg_clean_qps(struct rvt_mregion *mr) /** * rvt_check_refs - check references - * @mr - the megion - * @t - the caller identification + * @mr: the megion + * @t: the caller identification * * This routine checks MRs holding a reference during * when being de-registered. @@ -506,8 +507,8 @@ static int rvt_check_refs(struct rvt_mregion *mr, const char *t) /** * rvt_mr_has_lkey - is MR - * @mr - the mregion - * @lkey - the lkey + * @mr: the mregion + * @lkey: the lkey */ bool rvt_mr_has_lkey(struct rvt_mregion *mr, u32 lkey) { @@ -516,8 +517,8 @@ bool rvt_mr_has_lkey(struct rvt_mregion *mr, u32 lkey) /** * rvt_ss_has_lkey - is mr in sge tests - * @ss - the sge state - * @lkey + * @ss: the sge state + * @lkey: the lkey * * This code tests for an MR in the indicated * sge state. @@ -540,7 +541,7 @@ bool rvt_ss_has_lkey(struct rvt_sge_state *ss, u32 lkey) /** * rvt_dereg_mr - unregister and free a memory region * @ibmr: the memory region to free - * + * @udata: unused by the driver * * Note that this is called to free MRs created by rvt_get_dma_mr() * or rvt_reg_user_mr(). diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 22fa9bde5419..9d13db68283c 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -156,7 +156,7 @@ void rvt_wss_exit(struct rvt_dev_info *rdi) rdi->wss = NULL; } -/** +/* * rvt_wss_init - Init wss data structures * * Return: 0 on success @@ -323,6 +323,7 @@ static void get_map_page(struct rvt_qpn_table *qpt, /** * init_qpn_table - initialize the QP number table for a device + * @rdi: rvt dev struct * @qpt: the QPN table */ static int init_qpn_table(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt) @@ -524,6 +525,7 @@ static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, * IB_QPT_SMI/IB_QPT_GSI * @rdi: rvt device info structure * @qpt: queue pair number table pointer + * @type: the QP type * @port_num: IB port number, 1 based, comes from core * @exclude_prefix: prefix of special queue pair number being allocated * @@ -655,8 +657,8 @@ static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) /** * rvt_swqe_has_lkey - return true if lkey is used by swqe - * @wqe - the send wqe - * @lkey - the lkey + * @wqe: the send wqe + * @lkey: the lkey * * Test the swqe for using lkey */ @@ -675,8 +677,8 @@ static bool rvt_swqe_has_lkey(struct rvt_swqe *wqe, u32 lkey) /** * rvt_qp_sends_has_lkey - return true is qp sends use lkey - * @qp - the rvt_qp - * @lkey - the lkey + * @qp: the rvt_qp + * @lkey: the lkey */ static bool rvt_qp_sends_has_lkey(struct rvt_qp *qp, u32 lkey) { @@ -699,8 +701,8 @@ static bool rvt_qp_sends_has_lkey(struct rvt_qp *qp, u32 lkey) /** * rvt_qp_acks_has_lkey - return true if acks have lkey - * @qp - the qp - * @lkey - the lkey + * @qp: the qp + * @lkey: the lkey */ static bool rvt_qp_acks_has_lkey(struct rvt_qp *qp, u32 lkey) { @@ -716,10 +718,10 @@ static bool rvt_qp_acks_has_lkey(struct rvt_qp *qp, u32 lkey) return false; } -/* +/** * rvt_qp_mr_clean - clean up remote ops for lkey - * @qp - the qp - * @lkey - the lkey that is being de-registered + * @qp: the qp + * @lkey: the lkey that is being de-registered * * This routine checks if the lkey is being used by * the qp. @@ -853,6 +855,7 @@ bail: /** * rvt_init_qp - initialize the QP state to the reset state + * @rdi: rvt dev struct * @qp: the QP to init or reinit * @type: the QP type * @@ -907,6 +910,7 @@ static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, /** * _rvt_reset_qp - initialize the QP state to the reset state + * @rdi: rvt dev struct * @qp: the QP to reset * @type: the QP type * @@ -1726,6 +1730,7 @@ inval: /** * rvt_destroy_qp - destroy a queue pair * @ibqp: the queue pair to destroy + * @udata: unused by the driver * * Note that this can be called while the QP is actively sending or * receiving! @@ -1901,9 +1906,9 @@ int rvt_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, /** * rvt_qp_valid_operation - validate post send wr request - * @qp - the qp - * @post-parms - the post send table for the driver - * @wr - the work request + * @qp: the qp + * @post_parms: the post send table for the driver + * @wr: the work request * * The routine validates the operation based on the * validation table an returns the length of the operation @@ -2013,6 +2018,7 @@ static inline int rvt_qp_is_avail( * rvt_post_one_wr - post one RC, UC, or UD send work request * @qp: the QP to post on * @wr: the work request to send + * @call_send: kick the send engine into gear */ static int rvt_post_one_wr(struct rvt_qp *qp, const struct ib_send_wr *wr, @@ -2612,7 +2618,7 @@ EXPORT_SYMBOL(rvt_stop_rc_timers); /** * rvt_stop_rnr_timer - stop an rnr timer - * @qp - the QP + * @qp: the QP * * stop an rnr timer and return if the timer * had been pending. diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c index 64d98bf238ab..2a7c2f12d372 100644 --- a/drivers/infiniband/sw/rdmavt/srq.c +++ b/drivers/infiniband/sw/rdmavt/srq.c @@ -67,7 +67,7 @@ void rvt_driver_srq_init(struct rvt_dev_info *rdi) /** * rvt_create_srq - create a shared receive queue - * @ibpd: the protection domain of the SRQ to create + * @ibsrq: the protection domain of the SRQ to create * @srq_init_attr: the attributes of the SRQ * @udata: data from libibverbs when creating a user SRQ * @@ -311,7 +311,8 @@ bail_free: return ret; } -/** rvt_query_srq - query srq data +/** + * rvt_query_srq - query srq data * @ibsrq: srq to query * @attr: return info in attr * @@ -330,7 +331,7 @@ int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) /** * rvt_destroy_srq - destory an srq * @ibsrq: srq object to destroy - * + * @udata: user data for libibverbs.so */ int rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 49cec85a372a..8fd0128a9336 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -294,7 +294,7 @@ static int rvt_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) /** * rvt_dealloc_ucontext - Free a user context - * @context - Free this + * @context: Unused */ static void rvt_dealloc_ucontext(struct ib_ucontext *context) { diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 0a1e6393250b..a8ac791a1bb9 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -515,6 +515,7 @@ static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify) while ((skb = skb_dequeue(&qp->resp_pkts))) { rxe_drop_ref(qp); kfree_skb(skb); + ib_device_put(qp->ibqp.device); } while ((wqe = queue_head(qp->sq.queue))) { @@ -527,6 +528,17 @@ static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify) } } +static void free_pkt(struct rxe_pkt_info *pkt) +{ + struct sk_buff *skb = PKT_TO_SKB(pkt); + struct rxe_qp *qp = pkt->qp; + struct ib_device *dev = qp->ibqp.device; + + kfree_skb(skb); + rxe_drop_ref(qp); + ib_device_put(dev); +} + int rxe_completer(void *arg) { struct rxe_qp *qp = (struct rxe_qp *)arg; @@ -624,11 +636,8 @@ int rxe_completer(void *arg) break; case COMPST_DONE: - if (pkt) { - rxe_drop_ref(pkt->qp); - kfree_skb(skb); - skb = NULL; - } + if (pkt) + free_pkt(pkt); goto done; case COMPST_EXIT: @@ -671,12 +680,8 @@ int rxe_completer(void *arg) */ if (qp->comp.started_retry && !qp->comp.timeout_retry) { - if (pkt) { - rxe_drop_ref(pkt->qp); - kfree_skb(skb); - skb = NULL; - } - + if (pkt) + free_pkt(pkt); goto done; } @@ -699,13 +704,8 @@ int rxe_completer(void *arg) qp->comp.started_retry = 1; rxe_run_task(&qp->req.task, 0); } - - if (pkt) { - rxe_drop_ref(pkt->qp); - kfree_skb(skb); - skb = NULL; - } - + if (pkt) + free_pkt(pkt); goto done; } else { @@ -726,9 +726,7 @@ int rxe_completer(void *arg) mod_timer(&qp->rnr_nak_timer, jiffies + rnrnak_jiffies(aeth_syn(pkt) & ~AETH_TYPE_MASK)); - rxe_drop_ref(pkt->qp); - kfree_skb(skb); - skb = NULL; + free_pkt(pkt); goto exit; } else { rxe_counter_inc(rxe, @@ -742,13 +740,8 @@ int rxe_completer(void *arg) WARN_ON_ONCE(wqe->status == IB_WC_SUCCESS); do_complete(qp, wqe); rxe_qp_error(qp); - - if (pkt) { - rxe_drop_ref(pkt->qp); - kfree_skb(skb); - skb = NULL; - } - + if (pkt) + free_pkt(pkt); goto exit; } } diff --git a/drivers/infiniband/sw/rxe/rxe_hdr.h b/drivers/infiniband/sw/rxe/rxe_hdr.h index 3b483b75dfe3..e432f9e37795 100644 --- a/drivers/infiniband/sw/rxe/rxe_hdr.h +++ b/drivers/infiniband/sw/rxe/rxe_hdr.h @@ -22,7 +22,6 @@ struct rxe_pkt_info { u16 paylen; /* length of bth - icrc */ u8 port_num; /* port pkt received on */ u8 opcode; /* bth opcode of packet */ - u8 offset; /* bth offset from pkt->hdr */ }; /* Macros should be used only for received skb */ @@ -280,134 +279,134 @@ static inline void __bth_set_psn(void *arg, u32 psn) static inline u8 bth_opcode(struct rxe_pkt_info *pkt) { - return __bth_opcode(pkt->hdr + pkt->offset); + return __bth_opcode(pkt->hdr); } static inline void bth_set_opcode(struct rxe_pkt_info *pkt, u8 opcode) { - __bth_set_opcode(pkt->hdr + pkt->offset, opcode); + __bth_set_opcode(pkt->hdr, opcode); } static inline u8 bth_se(struct rxe_pkt_info *pkt) { - return __bth_se(pkt->hdr + pkt->offset); + return __bth_se(pkt->hdr); } static inline void bth_set_se(struct rxe_pkt_info *pkt, int se) { - __bth_set_se(pkt->hdr + pkt->offset, se); + __bth_set_se(pkt->hdr, se); } static inline u8 bth_mig(struct rxe_pkt_info *pkt) { - return __bth_mig(pkt->hdr + pkt->offset); + return __bth_mig(pkt->hdr); } static inline void bth_set_mig(struct rxe_pkt_info *pkt, u8 mig) { - __bth_set_mig(pkt->hdr + pkt->offset, mig); + __bth_set_mig(pkt->hdr, mig); } static inline u8 bth_pad(struct rxe_pkt_info *pkt) { - return __bth_pad(pkt->hdr + pkt->offset); + return __bth_pad(pkt->hdr); } static inline void bth_set_pad(struct rxe_pkt_info *pkt, u8 pad) { - __bth_set_pad(pkt->hdr + pkt->offset, pad); + __bth_set_pad(pkt->hdr, pad); } static inline u8 bth_tver(struct rxe_pkt_info *pkt) { - return __bth_tver(pkt->hdr + pkt->offset); + return __bth_tver(pkt->hdr); } static inline void bth_set_tver(struct rxe_pkt_info *pkt, u8 tver) { - __bth_set_tver(pkt->hdr + pkt->offset, tver); + __bth_set_tver(pkt->hdr, tver); } static inline u16 bth_pkey(struct rxe_pkt_info *pkt) { - return __bth_pkey(pkt->hdr + pkt->offset); + return __bth_pkey(pkt->hdr); } static inline void bth_set_pkey(struct rxe_pkt_info *pkt, u16 pkey) { - __bth_set_pkey(pkt->hdr + pkt->offset, pkey); + __bth_set_pkey(pkt->hdr, pkey); } static inline u32 bth_qpn(struct rxe_pkt_info *pkt) { - return __bth_qpn(pkt->hdr + pkt->offset); + return __bth_qpn(pkt->hdr); } static inline void bth_set_qpn(struct rxe_pkt_info *pkt, u32 qpn) { - __bth_set_qpn(pkt->hdr + pkt->offset, qpn); + __bth_set_qpn(pkt->hdr, qpn); } static inline int bth_fecn(struct rxe_pkt_info *pkt) { - return __bth_fecn(pkt->hdr + pkt->offset); + return __bth_fecn(pkt->hdr); } static inline void bth_set_fecn(struct rxe_pkt_info *pkt, int fecn) { - __bth_set_fecn(pkt->hdr + pkt->offset, fecn); + __bth_set_fecn(pkt->hdr, fecn); } static inline int bth_becn(struct rxe_pkt_info *pkt) { - return __bth_becn(pkt->hdr + pkt->offset); + return __bth_becn(pkt->hdr); } static inline void bth_set_becn(struct rxe_pkt_info *pkt, int becn) { - __bth_set_becn(pkt->hdr + pkt->offset, becn); + __bth_set_becn(pkt->hdr, becn); } static inline u8 bth_resv6a(struct rxe_pkt_info *pkt) { - return __bth_resv6a(pkt->hdr + pkt->offset); + return __bth_resv6a(pkt->hdr); } static inline void bth_set_resv6a(struct rxe_pkt_info *pkt) { - __bth_set_resv6a(pkt->hdr + pkt->offset); + __bth_set_resv6a(pkt->hdr); } static inline int bth_ack(struct rxe_pkt_info *pkt) { - return __bth_ack(pkt->hdr + pkt->offset); + return __bth_ack(pkt->hdr); } static inline void bth_set_ack(struct rxe_pkt_info *pkt, int ack) { - __bth_set_ack(pkt->hdr + pkt->offset, ack); + __bth_set_ack(pkt->hdr, ack); } static inline void bth_set_resv7(struct rxe_pkt_info *pkt) { - __bth_set_resv7(pkt->hdr + pkt->offset); + __bth_set_resv7(pkt->hdr); } static inline u32 bth_psn(struct rxe_pkt_info *pkt) { - return __bth_psn(pkt->hdr + pkt->offset); + return __bth_psn(pkt->hdr); } static inline void bth_set_psn(struct rxe_pkt_info *pkt, u32 psn) { - __bth_set_psn(pkt->hdr + pkt->offset, psn); + __bth_set_psn(pkt->hdr, psn); } static inline void bth_init(struct rxe_pkt_info *pkt, u8 opcode, int se, int mig, int pad, u16 pkey, u32 qpn, int ack_req, u32 psn) { - struct rxe_bth *bth = (struct rxe_bth *)(pkt->hdr + pkt->offset); + struct rxe_bth *bth = (struct rxe_bth *)(pkt->hdr); bth->opcode = opcode; bth->flags = (pad << 4) & BTH_PAD_MASK; @@ -448,14 +447,14 @@ static inline void __rdeth_set_een(void *arg, u32 een) static inline u8 rdeth_een(struct rxe_pkt_info *pkt) { - return __rdeth_een(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_RDETH]); + return __rdeth_een(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_RDETH]); } static inline void rdeth_set_een(struct rxe_pkt_info *pkt, u32 een) { - __rdeth_set_een(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_RDETH], een); + __rdeth_set_een(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_RDETH], een); } /****************************************************************************** @@ -499,26 +498,26 @@ static inline void __deth_set_sqp(void *arg, u32 sqp) static inline u32 deth_qkey(struct rxe_pkt_info *pkt) { - return __deth_qkey(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_DETH]); + return __deth_qkey(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_DETH]); } static inline void deth_set_qkey(struct rxe_pkt_info *pkt, u32 qkey) { - __deth_set_qkey(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_DETH], qkey); + __deth_set_qkey(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_DETH], qkey); } static inline u32 deth_sqp(struct rxe_pkt_info *pkt) { - return __deth_sqp(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_DETH]); + return __deth_sqp(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_DETH]); } static inline void deth_set_sqp(struct rxe_pkt_info *pkt, u32 sqp) { - __deth_set_sqp(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_DETH], sqp); + __deth_set_sqp(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_DETH], sqp); } /****************************************************************************** @@ -574,38 +573,38 @@ static inline void __reth_set_len(void *arg, u32 len) static inline u64 reth_va(struct rxe_pkt_info *pkt) { - return __reth_va(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_RETH]); + return __reth_va(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_RETH]); } static inline void reth_set_va(struct rxe_pkt_info *pkt, u64 va) { - __reth_set_va(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_RETH], va); + __reth_set_va(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_RETH], va); } static inline u32 reth_rkey(struct rxe_pkt_info *pkt) { - return __reth_rkey(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_RETH]); + return __reth_rkey(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_RETH]); } static inline void reth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey) { - __reth_set_rkey(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_RETH], rkey); + __reth_set_rkey(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_RETH], rkey); } static inline u32 reth_len(struct rxe_pkt_info *pkt) { - return __reth_len(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_RETH]); + return __reth_len(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_RETH]); } static inline void reth_set_len(struct rxe_pkt_info *pkt, u32 len) { - __reth_set_len(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_RETH], len); + __reth_set_len(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_RETH], len); } /****************************************************************************** @@ -676,50 +675,50 @@ static inline void __atmeth_set_comp(void *arg, u64 comp) static inline u64 atmeth_va(struct rxe_pkt_info *pkt) { - return __atmeth_va(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); + return __atmeth_va(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); } static inline void atmeth_set_va(struct rxe_pkt_info *pkt, u64 va) { - __atmeth_set_va(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], va); + __atmeth_set_va(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], va); } static inline u32 atmeth_rkey(struct rxe_pkt_info *pkt) { - return __atmeth_rkey(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); + return __atmeth_rkey(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); } static inline void atmeth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey) { - __atmeth_set_rkey(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], rkey); + __atmeth_set_rkey(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], rkey); } static inline u64 atmeth_swap_add(struct rxe_pkt_info *pkt) { - return __atmeth_swap_add(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); + return __atmeth_swap_add(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); } static inline void atmeth_set_swap_add(struct rxe_pkt_info *pkt, u64 swap_add) { - __atmeth_set_swap_add(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], swap_add); + __atmeth_set_swap_add(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], swap_add); } static inline u64 atmeth_comp(struct rxe_pkt_info *pkt) { - return __atmeth_comp(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); + return __atmeth_comp(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); } static inline void atmeth_set_comp(struct rxe_pkt_info *pkt, u64 comp) { - __atmeth_set_comp(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], comp); + __atmeth_set_comp(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], comp); } /****************************************************************************** @@ -780,26 +779,26 @@ static inline void __aeth_set_msn(void *arg, u32 msn) static inline u8 aeth_syn(struct rxe_pkt_info *pkt) { - return __aeth_syn(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_AETH]); + return __aeth_syn(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_AETH]); } static inline void aeth_set_syn(struct rxe_pkt_info *pkt, u8 syn) { - __aeth_set_syn(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_AETH], syn); + __aeth_set_syn(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_AETH], syn); } static inline u32 aeth_msn(struct rxe_pkt_info *pkt) { - return __aeth_msn(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_AETH]); + return __aeth_msn(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_AETH]); } static inline void aeth_set_msn(struct rxe_pkt_info *pkt, u32 msn) { - __aeth_set_msn(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_AETH], msn); + __aeth_set_msn(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_AETH], msn); } /****************************************************************************** @@ -825,14 +824,14 @@ static inline void __atmack_set_orig(void *arg, u64 orig) static inline u64 atmack_orig(struct rxe_pkt_info *pkt) { - return __atmack_orig(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMACK]); + return __atmack_orig(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMACK]); } static inline void atmack_set_orig(struct rxe_pkt_info *pkt, u64 orig) { - __atmack_set_orig(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMACK], orig); + __atmack_set_orig(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMACK], orig); } /****************************************************************************** @@ -858,14 +857,14 @@ static inline void __immdt_set_imm(void *arg, __be32 imm) static inline __be32 immdt_imm(struct rxe_pkt_info *pkt) { - return __immdt_imm(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_IMMDT]); + return __immdt_imm(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_IMMDT]); } static inline void immdt_set_imm(struct rxe_pkt_info *pkt, __be32 imm) { - __immdt_set_imm(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_IMMDT], imm); + __immdt_set_imm(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_IMMDT], imm); } /****************************************************************************** @@ -891,14 +890,14 @@ static inline void __ieth_set_rkey(void *arg, u32 rkey) static inline u32 ieth_rkey(struct rxe_pkt_info *pkt) { - return __ieth_rkey(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_IETH]); + return __ieth_rkey(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_IETH]); } static inline void ieth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey) { - __ieth_set_rkey(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_IETH], rkey); + __ieth_set_rkey(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_IETH], rkey); } enum rxe_hdr_length { @@ -915,13 +914,12 @@ enum rxe_hdr_length { static inline size_t header_size(struct rxe_pkt_info *pkt) { - return pkt->offset + rxe_opcode[pkt->opcode].length; + return rxe_opcode[pkt->opcode].length; } static inline void *payload_addr(struct rxe_pkt_info *pkt) { - return pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_PAYLOAD]; + return pkt->hdr + rxe_opcode[pkt->opcode].offset[RXE_PAYLOAD]; } static inline size_t payload_size(struct rxe_pkt_info *pkt) diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c index c02315aed8d1..0ea9a5aa4ec0 100644 --- a/drivers/infiniband/sw/rxe/rxe_mcast.c +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c @@ -7,45 +7,61 @@ #include "rxe.h" #include "rxe_loc.h" +/* caller should hold mc_grp_pool->pool_lock */ +static struct rxe_mc_grp *create_grp(struct rxe_dev *rxe, + struct rxe_pool *pool, + union ib_gid *mgid) +{ + int err; + struct rxe_mc_grp *grp; + + grp = rxe_alloc_locked(&rxe->mc_grp_pool); + if (!grp) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&grp->qp_list); + spin_lock_init(&grp->mcg_lock); + grp->rxe = rxe; + rxe_add_key_locked(grp, mgid); + + err = rxe_mcast_add(rxe, mgid); + if (unlikely(err)) { + rxe_drop_key_locked(grp); + rxe_drop_ref(grp); + return ERR_PTR(err); + } + + return grp; +} + int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid, struct rxe_mc_grp **grp_p) { int err; struct rxe_mc_grp *grp; + struct rxe_pool *pool = &rxe->mc_grp_pool; + unsigned long flags; - if (rxe->attr.max_mcast_qp_attach == 0) { - err = -EINVAL; - goto err1; - } + if (rxe->attr.max_mcast_qp_attach == 0) + return -EINVAL; - grp = rxe_pool_get_key(&rxe->mc_grp_pool, mgid); + write_lock_irqsave(&pool->pool_lock, flags); + + grp = rxe_pool_get_key_locked(pool, mgid); if (grp) goto done; - grp = rxe_alloc(&rxe->mc_grp_pool); - if (!grp) { - err = -ENOMEM; - goto err1; + grp = create_grp(rxe, pool, mgid); + if (IS_ERR(grp)) { + write_unlock_irqrestore(&pool->pool_lock, flags); + err = PTR_ERR(grp); + return err; } - INIT_LIST_HEAD(&grp->qp_list); - spin_lock_init(&grp->mcg_lock); - grp->rxe = rxe; - - rxe_add_key(grp, mgid); - - err = rxe_mcast_add(rxe, mgid); - if (err) - goto err2; - done: + write_unlock_irqrestore(&pool->pool_lock, flags); *grp_p = grp; return 0; - -err2: - rxe_drop_ref(grp); -err1: - return err; } int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp, diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 943914c2a50c..0701bd1ffd1a 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -153,15 +153,16 @@ static struct dst_entry *rxe_find_route(struct net_device *ndev, static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct udphdr *udph; + struct rxe_dev *rxe; struct net_device *ndev = skb->dev; - struct net_device *rdev = ndev; - struct rxe_dev *rxe = rxe_get_dev_from_net(ndev); struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); - if (!rxe && is_vlan_dev(rdev)) { - rdev = vlan_dev_real_dev(ndev); - rxe = rxe_get_dev_from_net(rdev); - } + /* takes a reference on rxe->ib_dev + * drop when skb is freed + */ + rxe = rxe_get_dev_from_net(ndev); + if (!rxe && is_vlan_dev(ndev)) + rxe = rxe_get_dev_from_net(vlan_dev_real_dev(ndev)); if (!rxe) goto drop; @@ -180,12 +181,6 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb) rxe_rcv(skb); - /* - * FIXME: this is in the wrong place, it needs to be done when pkt is - * destroyed - */ - ib_device_put(&rxe->ib_dev); - return 0; drop: kfree_skb(skb); @@ -414,6 +409,11 @@ int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb) void rxe_loopback(struct sk_buff *skb) { + if (skb->protocol == htons(ETH_P_IP)) + skb_pull(skb, sizeof(struct iphdr)); + else + skb_pull(skb, sizeof(struct ipv6hdr)); + rxe_rcv(skb); } diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index b374eb53e2fe..307d8986e7c9 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -15,21 +15,25 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_UC] = { .name = "rxe-uc", .size = sizeof(struct rxe_ucontext), + .elem_offset = offsetof(struct rxe_ucontext, pelem), .flags = RXE_POOL_NO_ALLOC, }, [RXE_TYPE_PD] = { .name = "rxe-pd", .size = sizeof(struct rxe_pd), + .elem_offset = offsetof(struct rxe_pd, pelem), .flags = RXE_POOL_NO_ALLOC, }, [RXE_TYPE_AH] = { .name = "rxe-ah", .size = sizeof(struct rxe_ah), - .flags = RXE_POOL_ATOMIC | RXE_POOL_NO_ALLOC, + .elem_offset = offsetof(struct rxe_ah, pelem), + .flags = RXE_POOL_NO_ALLOC, }, [RXE_TYPE_SRQ] = { .name = "rxe-srq", .size = sizeof(struct rxe_srq), + .elem_offset = offsetof(struct rxe_srq, pelem), .flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC, .min_index = RXE_MIN_SRQ_INDEX, .max_index = RXE_MAX_SRQ_INDEX, @@ -37,6 +41,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_QP] = { .name = "rxe-qp", .size = sizeof(struct rxe_qp), + .elem_offset = offsetof(struct rxe_qp, pelem), .cleanup = rxe_qp_cleanup, .flags = RXE_POOL_INDEX, .min_index = RXE_MIN_QP_INDEX, @@ -45,12 +50,14 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_CQ] = { .name = "rxe-cq", .size = sizeof(struct rxe_cq), + .elem_offset = offsetof(struct rxe_cq, pelem), .flags = RXE_POOL_NO_ALLOC, .cleanup = rxe_cq_cleanup, }, [RXE_TYPE_MR] = { .name = "rxe-mr", .size = sizeof(struct rxe_mem), + .elem_offset = offsetof(struct rxe_mem, pelem), .cleanup = rxe_mem_cleanup, .flags = RXE_POOL_INDEX, .max_index = RXE_MAX_MR_INDEX, @@ -59,6 +66,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_MW] = { .name = "rxe-mw", .size = sizeof(struct rxe_mem), + .elem_offset = offsetof(struct rxe_mem, pelem), .flags = RXE_POOL_INDEX, .max_index = RXE_MAX_MW_INDEX, .min_index = RXE_MIN_MW_INDEX, @@ -66,6 +74,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_MC_GRP] = { .name = "rxe-mc_grp", .size = sizeof(struct rxe_mc_grp), + .elem_offset = offsetof(struct rxe_mc_grp, pelem), .cleanup = rxe_mc_cleanup, .flags = RXE_POOL_KEY, .key_offset = offsetof(struct rxe_mc_grp, mgid), @@ -74,7 +83,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_MC_ELEM] = { .name = "rxe-mc_elem", .size = sizeof(struct rxe_mc_elem), - .flags = RXE_POOL_ATOMIC, + .elem_offset = offsetof(struct rxe_mc_elem, pelem), }, }; @@ -94,18 +103,18 @@ static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min) goto out; } - pool->max_index = max; - pool->min_index = min; + pool->index.max_index = max; + pool->index.min_index = min; size = BITS_TO_LONGS(max - min + 1) * sizeof(long); - pool->table = kmalloc(size, GFP_KERNEL); - if (!pool->table) { + pool->index.table = kmalloc(size, GFP_KERNEL); + if (!pool->index.table) { err = -ENOMEM; goto out; } - pool->table_size = size; - bitmap_zero(pool->table, max - min + 1); + pool->index.table_size = size; + bitmap_zero(pool->index.table, max - min + 1); out: return err; @@ -127,13 +136,12 @@ int rxe_pool_init( pool->max_elem = max_elem; pool->elem_size = ALIGN(size, RXE_POOL_ALIGN); pool->flags = rxe_type_info[type].flags; - pool->tree = RB_ROOT; + pool->index.tree = RB_ROOT; + pool->key.tree = RB_ROOT; pool->cleanup = rxe_type_info[type].cleanup; atomic_set(&pool->num_elem, 0); - kref_init(&pool->ref_cnt); - rwlock_init(&pool->pool_lock); if (rxe_type_info[type].flags & RXE_POOL_INDEX) { @@ -145,67 +153,47 @@ int rxe_pool_init( } if (rxe_type_info[type].flags & RXE_POOL_KEY) { - pool->key_offset = rxe_type_info[type].key_offset; - pool->key_size = rxe_type_info[type].key_size; + pool->key.key_offset = rxe_type_info[type].key_offset; + pool->key.key_size = rxe_type_info[type].key_size; } - pool->state = RXE_POOL_STATE_VALID; - out: return err; } -static void rxe_pool_release(struct kref *kref) -{ - struct rxe_pool *pool = container_of(kref, struct rxe_pool, ref_cnt); - - pool->state = RXE_POOL_STATE_INVALID; - kfree(pool->table); -} - -static void rxe_pool_put(struct rxe_pool *pool) -{ - kref_put(&pool->ref_cnt, rxe_pool_release); -} - void rxe_pool_cleanup(struct rxe_pool *pool) { - unsigned long flags; - - write_lock_irqsave(&pool->pool_lock, flags); - pool->state = RXE_POOL_STATE_INVALID; if (atomic_read(&pool->num_elem) > 0) pr_warn("%s pool destroyed with unfree'd elem\n", pool_name(pool)); - write_unlock_irqrestore(&pool->pool_lock, flags); - rxe_pool_put(pool); + kfree(pool->index.table); } static u32 alloc_index(struct rxe_pool *pool) { u32 index; - u32 range = pool->max_index - pool->min_index + 1; + u32 range = pool->index.max_index - pool->index.min_index + 1; - index = find_next_zero_bit(pool->table, range, pool->last); + index = find_next_zero_bit(pool->index.table, range, pool->index.last); if (index >= range) - index = find_first_zero_bit(pool->table, range); + index = find_first_zero_bit(pool->index.table, range); WARN_ON_ONCE(index >= range); - set_bit(index, pool->table); - pool->last = index; - return index + pool->min_index; + set_bit(index, pool->index.table); + pool->index.last = index; + return index + pool->index.min_index; } static void insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new) { - struct rb_node **link = &pool->tree.rb_node; + struct rb_node **link = &pool->index.tree.rb_node; struct rb_node *parent = NULL; struct rxe_pool_entry *elem; while (*link) { parent = *link; - elem = rb_entry(parent, struct rxe_pool_entry, node); + elem = rb_entry(parent, struct rxe_pool_entry, index_node); if (elem->index == new->index) { pr_warn("element already exists!\n"); @@ -218,25 +206,25 @@ static void insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new) link = &(*link)->rb_right; } - rb_link_node(&new->node, parent, link); - rb_insert_color(&new->node, &pool->tree); + rb_link_node(&new->index_node, parent, link); + rb_insert_color(&new->index_node, &pool->index.tree); out: return; } static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new) { - struct rb_node **link = &pool->tree.rb_node; + struct rb_node **link = &pool->key.tree.rb_node; struct rb_node *parent = NULL; struct rxe_pool_entry *elem; int cmp; while (*link) { parent = *link; - elem = rb_entry(parent, struct rxe_pool_entry, node); + elem = rb_entry(parent, struct rxe_pool_entry, key_node); - cmp = memcmp((u8 *)elem + pool->key_offset, - (u8 *)new + pool->key_offset, pool->key_size); + cmp = memcmp((u8 *)elem + pool->key.key_offset, + (u8 *)new + pool->key.key_offset, pool->key.key_size); if (cmp == 0) { pr_warn("key already exists!\n"); @@ -249,116 +237,135 @@ static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new) link = &(*link)->rb_right; } - rb_link_node(&new->node, parent, link); - rb_insert_color(&new->node, &pool->tree); + rb_link_node(&new->key_node, parent, link); + rb_insert_color(&new->key_node, &pool->key.tree); out: return; } -void rxe_add_key(void *arg, void *key) +void __rxe_add_key_locked(struct rxe_pool_entry *elem, void *key) { - struct rxe_pool_entry *elem = arg; struct rxe_pool *pool = elem->pool; - unsigned long flags; - write_lock_irqsave(&pool->pool_lock, flags); - memcpy((u8 *)elem + pool->key_offset, key, pool->key_size); + memcpy((u8 *)elem + pool->key.key_offset, key, pool->key.key_size); insert_key(pool, elem); - write_unlock_irqrestore(&pool->pool_lock, flags); } -void rxe_drop_key(void *arg) +void __rxe_add_key(struct rxe_pool_entry *elem, void *key) { - struct rxe_pool_entry *elem = arg; struct rxe_pool *pool = elem->pool; unsigned long flags; write_lock_irqsave(&pool->pool_lock, flags); - rb_erase(&elem->node, &pool->tree); + __rxe_add_key_locked(elem, key); write_unlock_irqrestore(&pool->pool_lock, flags); } -void rxe_add_index(void *arg) +void __rxe_drop_key_locked(struct rxe_pool_entry *elem) +{ + struct rxe_pool *pool = elem->pool; + + rb_erase(&elem->key_node, &pool->key.tree); +} + +void __rxe_drop_key(struct rxe_pool_entry *elem) { - struct rxe_pool_entry *elem = arg; struct rxe_pool *pool = elem->pool; unsigned long flags; write_lock_irqsave(&pool->pool_lock, flags); + __rxe_drop_key_locked(elem); + write_unlock_irqrestore(&pool->pool_lock, flags); +} + +void __rxe_add_index_locked(struct rxe_pool_entry *elem) +{ + struct rxe_pool *pool = elem->pool; + elem->index = alloc_index(pool); insert_index(pool, elem); - write_unlock_irqrestore(&pool->pool_lock, flags); } -void rxe_drop_index(void *arg) +void __rxe_add_index(struct rxe_pool_entry *elem) { - struct rxe_pool_entry *elem = arg; struct rxe_pool *pool = elem->pool; unsigned long flags; write_lock_irqsave(&pool->pool_lock, flags); - clear_bit(elem->index - pool->min_index, pool->table); - rb_erase(&elem->node, &pool->tree); + __rxe_add_index_locked(elem); write_unlock_irqrestore(&pool->pool_lock, flags); } -void *rxe_alloc(struct rxe_pool *pool) +void __rxe_drop_index_locked(struct rxe_pool_entry *elem) { - struct rxe_pool_entry *elem; - unsigned long flags; + struct rxe_pool *pool = elem->pool; - might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC)); + clear_bit(elem->index - pool->index.min_index, pool->index.table); + rb_erase(&elem->index_node, &pool->index.tree); +} - read_lock_irqsave(&pool->pool_lock, flags); - if (pool->state != RXE_POOL_STATE_VALID) { - read_unlock_irqrestore(&pool->pool_lock, flags); - return NULL; - } - kref_get(&pool->ref_cnt); - read_unlock_irqrestore(&pool->pool_lock, flags); +void __rxe_drop_index(struct rxe_pool_entry *elem) +{ + struct rxe_pool *pool = elem->pool; + unsigned long flags; + + write_lock_irqsave(&pool->pool_lock, flags); + __rxe_drop_index_locked(elem); + write_unlock_irqrestore(&pool->pool_lock, flags); +} - if (!ib_device_try_get(&pool->rxe->ib_dev)) - goto out_put_pool; +void *rxe_alloc_locked(struct rxe_pool *pool) +{ + struct rxe_type_info *info = &rxe_type_info[pool->type]; + struct rxe_pool_entry *elem; + u8 *obj; if (atomic_inc_return(&pool->num_elem) > pool->max_elem) goto out_cnt; - elem = kzalloc(rxe_type_info[pool->type].size, - (pool->flags & RXE_POOL_ATOMIC) ? - GFP_ATOMIC : GFP_KERNEL); - if (!elem) + obj = kzalloc(info->size, GFP_ATOMIC); + if (!obj) goto out_cnt; + elem = (struct rxe_pool_entry *)(obj + info->elem_offset); + elem->pool = pool; kref_init(&elem->ref_cnt); - return elem; + return obj; out_cnt: atomic_dec(&pool->num_elem); - ib_device_put(&pool->rxe->ib_dev); -out_put_pool: - rxe_pool_put(pool); return NULL; } -int rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem) +void *rxe_alloc(struct rxe_pool *pool) { - unsigned long flags; + struct rxe_type_info *info = &rxe_type_info[pool->type]; + struct rxe_pool_entry *elem; + u8 *obj; + + if (atomic_inc_return(&pool->num_elem) > pool->max_elem) + goto out_cnt; - might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC)); + obj = kzalloc(info->size, GFP_KERNEL); + if (!obj) + goto out_cnt; - read_lock_irqsave(&pool->pool_lock, flags); - if (pool->state != RXE_POOL_STATE_VALID) { - read_unlock_irqrestore(&pool->pool_lock, flags); - return -EINVAL; - } - kref_get(&pool->ref_cnt); - read_unlock_irqrestore(&pool->pool_lock, flags); + elem = (struct rxe_pool_entry *)(obj + info->elem_offset); - if (!ib_device_try_get(&pool->rxe->ib_dev)) - goto out_put_pool; + elem->pool = pool; + kref_init(&elem->ref_cnt); + + return obj; + +out_cnt: + atomic_dec(&pool->num_elem); + return NULL; +} +int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem) +{ if (atomic_inc_return(&pool->num_elem) > pool->max_elem) goto out_cnt; @@ -369,9 +376,6 @@ int rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem) out_cnt: atomic_dec(&pool->num_elem); - ib_device_put(&pool->rxe->ib_dev); -out_put_pool: - rxe_pool_put(pool); return -EINVAL; } @@ -380,67 +384,77 @@ void rxe_elem_release(struct kref *kref) struct rxe_pool_entry *elem = container_of(kref, struct rxe_pool_entry, ref_cnt); struct rxe_pool *pool = elem->pool; + struct rxe_type_info *info = &rxe_type_info[pool->type]; + u8 *obj; if (pool->cleanup) pool->cleanup(elem); - if (!(pool->flags & RXE_POOL_NO_ALLOC)) - kfree(elem); + if (!(pool->flags & RXE_POOL_NO_ALLOC)) { + obj = (u8 *)elem - info->elem_offset; + kfree(obj); + } + atomic_dec(&pool->num_elem); - ib_device_put(&pool->rxe->ib_dev); - rxe_pool_put(pool); } -void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) +void *rxe_pool_get_index_locked(struct rxe_pool *pool, u32 index) { - struct rb_node *node = NULL; - struct rxe_pool_entry *elem = NULL; - unsigned long flags; - - read_lock_irqsave(&pool->pool_lock, flags); - - if (pool->state != RXE_POOL_STATE_VALID) - goto out; + struct rxe_type_info *info = &rxe_type_info[pool->type]; + struct rb_node *node; + struct rxe_pool_entry *elem; + u8 *obj; - node = pool->tree.rb_node; + node = pool->index.tree.rb_node; while (node) { - elem = rb_entry(node, struct rxe_pool_entry, node); + elem = rb_entry(node, struct rxe_pool_entry, index_node); if (elem->index > index) node = node->rb_left; else if (elem->index < index) node = node->rb_right; - else { - kref_get(&elem->ref_cnt); + else break; - } } -out: - read_unlock_irqrestore(&pool->pool_lock, flags); - return node ? elem : NULL; + if (node) { + kref_get(&elem->ref_cnt); + obj = (u8 *)elem - info->elem_offset; + } else { + obj = NULL; + } + + return obj; } -void *rxe_pool_get_key(struct rxe_pool *pool, void *key) +void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) { - struct rb_node *node = NULL; - struct rxe_pool_entry *elem = NULL; - int cmp; + u8 *obj; unsigned long flags; read_lock_irqsave(&pool->pool_lock, flags); + obj = rxe_pool_get_index_locked(pool, index); + read_unlock_irqrestore(&pool->pool_lock, flags); - if (pool->state != RXE_POOL_STATE_VALID) - goto out; + return obj; +} - node = pool->tree.rb_node; +void *rxe_pool_get_key_locked(struct rxe_pool *pool, void *key) +{ + struct rxe_type_info *info = &rxe_type_info[pool->type]; + struct rb_node *node; + struct rxe_pool_entry *elem; + u8 *obj; + int cmp; + + node = pool->key.tree.rb_node; while (node) { - elem = rb_entry(node, struct rxe_pool_entry, node); + elem = rb_entry(node, struct rxe_pool_entry, key_node); - cmp = memcmp((u8 *)elem + pool->key_offset, - key, pool->key_size); + cmp = memcmp((u8 *)elem + pool->key.key_offset, + key, pool->key.key_size); if (cmp > 0) node = node->rb_left; @@ -450,10 +464,24 @@ void *rxe_pool_get_key(struct rxe_pool *pool, void *key) break; } - if (node) + if (node) { kref_get(&elem->ref_cnt); + obj = (u8 *)elem - info->elem_offset; + } else { + obj = NULL; + } -out: + return obj; +} + +void *rxe_pool_get_key(struct rxe_pool *pool, void *key) +{ + u8 *obj; + unsigned long flags; + + read_lock_irqsave(&pool->pool_lock, flags); + obj = rxe_pool_get_key_locked(pool, key); read_unlock_irqrestore(&pool->pool_lock, flags); - return node ? elem : NULL; + + return obj; } diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h index 432745ffc8d4..61210b300a78 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.h +++ b/drivers/infiniband/sw/rxe/rxe_pool.h @@ -11,7 +11,6 @@ #define RXE_POOL_CACHE_FLAGS (0) enum rxe_pool_flags { - RXE_POOL_ATOMIC = BIT(0), RXE_POOL_INDEX = BIT(1), RXE_POOL_KEY = BIT(2), RXE_POOL_NO_ALLOC = BIT(4), @@ -36,6 +35,7 @@ struct rxe_pool_entry; struct rxe_type_info { const char *name; size_t size; + size_t elem_offset; void (*cleanup)(struct rxe_pool_entry *obj); enum rxe_pool_flags flags; u32 max_index; @@ -46,18 +46,16 @@ struct rxe_type_info { extern struct rxe_type_info rxe_type_info[]; -enum rxe_pool_state { - RXE_POOL_STATE_INVALID, - RXE_POOL_STATE_VALID, -}; - struct rxe_pool_entry { struct rxe_pool *pool; struct kref ref_cnt; struct list_head list; - /* only used if indexed or keyed */ - struct rb_node node; + /* only used if keyed */ + struct rb_node key_node; + + /* only used if indexed */ + struct rb_node index_node; u32 index; }; @@ -65,24 +63,29 @@ struct rxe_pool { struct rxe_dev *rxe; rwlock_t pool_lock; /* protects pool add/del/search */ size_t elem_size; - struct kref ref_cnt; void (*cleanup)(struct rxe_pool_entry *obj); - enum rxe_pool_state state; enum rxe_pool_flags flags; enum rxe_elem_type type; unsigned int max_elem; atomic_t num_elem; - /* only used if indexed or keyed */ - struct rb_root tree; - unsigned long *table; - size_t table_size; - u32 max_index; - u32 min_index; - u32 last; - size_t key_offset; - size_t key_size; + /* only used if indexed */ + struct { + struct rb_root tree; + unsigned long *table; + size_t table_size; + u32 last; + u32 max_index; + u32 min_index; + } index; + + /* only used if keyed */ + struct { + struct rb_root tree; + size_t key_offset; + size_t key_size; + } key; }; /* initialize a pool of objects with given limit on @@ -95,32 +98,70 @@ int rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool, /* free resources from object pool */ void rxe_pool_cleanup(struct rxe_pool *pool); -/* allocate an object from pool */ +/* allocate an object from pool holding and not holding the pool lock */ +void *rxe_alloc_locked(struct rxe_pool *pool); + void *rxe_alloc(struct rxe_pool *pool); /* connect already allocated object to pool */ -int rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem); +int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem); + +#define rxe_add_to_pool(pool, obj) __rxe_add_to_pool(pool, &(obj)->pelem) /* assign an index to an indexed object and insert object into - * pool's rb tree + * pool's rb tree holding and not holding the pool_lock */ -void rxe_add_index(void *elem); +void __rxe_add_index_locked(struct rxe_pool_entry *elem); + +#define rxe_add_index_locked(obj) __rxe_add_index_locked(&(obj)->pelem) -/* drop an index and remove object from rb tree */ -void rxe_drop_index(void *elem); +void __rxe_add_index(struct rxe_pool_entry *elem); + +#define rxe_add_index(obj) __rxe_add_index(&(obj)->pelem) + +/* drop an index and remove object from rb tree + * holding and not holding the pool_lock + */ +void __rxe_drop_index_locked(struct rxe_pool_entry *elem); + +#define rxe_drop_index_locked(obj) __rxe_drop_index_locked(&(obj)->pelem) + +void __rxe_drop_index(struct rxe_pool_entry *elem); + +#define rxe_drop_index(obj) __rxe_drop_index(&(obj)->pelem) /* assign a key to a keyed object and insert object into - * pool's rb tree + * pool's rb tree holding and not holding pool_lock */ -void rxe_add_key(void *elem, void *key); +void __rxe_add_key_locked(struct rxe_pool_entry *elem, void *key); + +#define rxe_add_key_locked(obj, key) __rxe_add_key_locked(&(obj)->pelem, key) + +void __rxe_add_key(struct rxe_pool_entry *elem, void *key); + +#define rxe_add_key(obj, key) __rxe_add_key(&(obj)->pelem, key) + +/* remove elem from rb tree holding and not holding the pool_lock */ +void __rxe_drop_key_locked(struct rxe_pool_entry *elem); -/* remove elem from rb tree */ -void rxe_drop_key(void *elem); +#define rxe_drop_key_locked(obj) __rxe_drop_key_locked(&(obj)->pelem) + +void __rxe_drop_key(struct rxe_pool_entry *elem); + +#define rxe_drop_key(obj) __rxe_drop_key(&(obj)->pelem) + +/* lookup an indexed object from index holding and not holding the pool_lock. + * takes a reference on object + */ +void *rxe_pool_get_index_locked(struct rxe_pool *pool, u32 index); -/* lookup an indexed object from index. takes a reference on object */ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index); -/* lookup keyed object from key. takes a reference on the object */ +/* lookup keyed object from key holding and not holding the pool_lock. + * takes a reference on the objecti + */ +void *rxe_pool_get_key_locked(struct rxe_pool *pool, void *key); + void *rxe_pool_get_key(struct rxe_pool *pool, void *key); /* cleanup an object when all references are dropped */ diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 656a5b4be847..34ae957a315c 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -62,6 +62,17 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init) struct rxe_port *port; int port_num = init->port_num; + switch (init->qp_type) { + case IB_QPT_SMI: + case IB_QPT_GSI: + case IB_QPT_RC: + case IB_QPT_UC: + case IB_QPT_UD: + break; + default: + return -EOPNOTSUPP; + } + if (!init->recv_cq || !init->send_cq) { pr_warn("missing cq\n"); goto err1; diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index c9984a28eecc..45d2f711bce2 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -9,21 +9,26 @@ #include "rxe.h" #include "rxe_loc.h" +/* check that QP matches packet opcode type and is in a valid state */ static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct rxe_qp *qp) { + unsigned int pkt_type; + if (unlikely(!qp->valid)) goto err1; + pkt_type = pkt->opcode & 0xe0; + switch (qp_type(qp)) { case IB_QPT_RC: - if (unlikely((pkt->opcode & IB_OPCODE_RC) != 0)) { + if (unlikely(pkt_type != IB_OPCODE_RC)) { pr_warn_ratelimited("bad qp type\n"); goto err1; } break; case IB_QPT_UC: - if (unlikely(!(pkt->opcode & IB_OPCODE_UC))) { + if (unlikely(pkt_type != IB_OPCODE_UC)) { pr_warn_ratelimited("bad qp type\n"); goto err1; } @@ -31,7 +36,7 @@ static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, case IB_QPT_UD: case IB_QPT_SMI: case IB_QPT_GSI: - if (unlikely(!(pkt->opcode & IB_OPCODE_UD))) { + if (unlikely(pkt_type != IB_OPCODE_UD)) { pr_warn_ratelimited("bad qp type\n"); goto err1; } @@ -85,8 +90,7 @@ static int check_keys(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, goto err1; } - if ((qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_GSI) && - pkt->mask) { + if (qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_GSI) { u32 qkey = (qpn == 1) ? GSI_QKEY : qp->attr.qkey; if (unlikely(deth_qkey(pkt) != qkey)) { @@ -252,7 +256,6 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) list_for_each_entry(mce, &mcg->qp_list, qp_list) { qp = mce->qp; - pkt = SKB_TO_PKT(skb); /* validate qp for incoming packet */ err = check_type_state(rxe, pkt, qp); @@ -264,12 +267,22 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) continue; /* for all but the last qp create a new clone of the - * skb and pass to the qp. + * skb and pass to the qp. If an error occurs in the + * checks for the last qp in the list we need to + * free the skb since it hasn't been passed on to + * rxe_rcv_pkt() which would free it later. */ - if (mce->qp_list.next != &mcg->qp_list) + if (mce->qp_list.next != &mcg->qp_list) { per_qp_skb = skb_clone(skb, GFP_ATOMIC); - else + if (WARN_ON(!ib_device_try_get(&rxe->ib_dev))) { + kfree_skb(per_qp_skb); + continue; + } + } else { per_qp_skb = skb; + /* show we have consumed the skb */ + skb = NULL; + } if (unlikely(!per_qp_skb)) continue; @@ -284,10 +297,10 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) rxe_drop_ref(mcg); /* drop ref from rxe_pool_get_key. */ - return; - err1: + /* free skb if not consumed */ kfree_skb(skb); + ib_device_put(&rxe->ib_dev); } /** @@ -340,9 +353,7 @@ void rxe_rcv(struct sk_buff *skb) __be32 *icrcp; u32 calc_icrc, pack_icrc; - pkt->offset = 0; - - if (unlikely(skb->len < pkt->offset + RXE_BTH_BYTES)) + if (unlikely(skb->len < RXE_BTH_BYTES)) goto drop; if (rxe_chk_dgid(rxe, skb) < 0) { @@ -397,4 +408,5 @@ drop: rxe_drop_ref(pkt->qp); kfree_skb(skb); + ib_device_put(&rxe->ib_dev); } diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index d4917646641a..889290793d75 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -375,7 +375,6 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, pkt->psn = qp->req.psn; pkt->mask = rxe_opcode[opcode].mask; pkt->paylen = paylen; - pkt->offset = 0; pkt->wqe = wqe; /* init skb */ diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index c7e3b6a4af38..142f3d8014d8 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -99,6 +99,7 @@ static inline enum resp_states get_req(struct rxe_qp *qp, while ((skb = skb_dequeue(&qp->req_pkts))) { rxe_drop_ref(qp); kfree_skb(skb); + ib_device_put(qp->ibqp.device); } /* go drain recv wr queue */ @@ -585,11 +586,10 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, ack->qp = qp; ack->opcode = opcode; ack->mask = rxe_opcode[opcode].mask; - ack->offset = pkt->offset; ack->paylen = paylen; /* fill in bth using the request packet headers */ - memcpy(ack->hdr, pkt->hdr, pkt->offset + RXE_BTH_BYTES); + memcpy(ack->hdr, pkt->hdr, RXE_BTH_BYTES); bth_set_opcode(ack, opcode); bth_set_qpn(ack, qp->attr.dest_qp_num); @@ -1017,6 +1017,7 @@ static enum resp_states cleanup(struct rxe_qp *qp, skb = skb_dequeue(&qp->req_pkts); rxe_drop_ref(qp); kfree_skb(skb); + ib_device_put(qp->ibqp.device); } if (qp->resp.mr) { @@ -1181,6 +1182,7 @@ static void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify) while ((skb = skb_dequeue(&qp->req_pkts))) { rxe_drop_ref(qp); kfree_skb(skb); + ib_device_put(qp->ibqp.device); } if (notify) diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index a031514e2f41..dee5e0e919d2 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -106,12 +106,12 @@ static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev, return IB_LINK_LAYER_ETHERNET; } -static int rxe_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) +static int rxe_alloc_ucontext(struct ib_ucontext *ibuc, struct ib_udata *udata) { - struct rxe_dev *rxe = to_rdev(uctx->device); - struct rxe_ucontext *uc = to_ruc(uctx); + struct rxe_dev *rxe = to_rdev(ibuc->device); + struct rxe_ucontext *uc = to_ruc(ibuc); - return rxe_add_to_pool(&rxe->uc_pool, &uc->pelem); + return rxe_add_to_pool(&rxe->uc_pool, uc); } static void rxe_dealloc_ucontext(struct ib_ucontext *ibuc) @@ -145,7 +145,7 @@ static int rxe_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) struct rxe_dev *rxe = to_rdev(ibpd->device); struct rxe_pd *pd = to_rpd(ibpd); - return rxe_add_to_pool(&rxe->pd_pool, &pd->pelem); + return rxe_add_to_pool(&rxe->pd_pool, pd); } static int rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) @@ -169,7 +169,7 @@ static int rxe_create_ah(struct ib_ah *ibah, if (err) return err; - err = rxe_add_to_pool(&rxe->ah_pool, &ah->pelem); + err = rxe_add_to_pool(&rxe->ah_pool, ah); if (err) return err; @@ -273,7 +273,7 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init, if (err) goto err1; - err = rxe_add_to_pool(&rxe->srq_pool, &srq->pelem); + err = rxe_add_to_pool(&rxe->srq_pool, srq); if (err) goto err1; @@ -555,37 +555,42 @@ static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, } } -static int init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr, +static void copy_inline_data_to_wqe(struct rxe_send_wqe *wqe, + const struct ib_send_wr *ibwr) +{ + struct ib_sge *sge = ibwr->sg_list; + u8 *p = wqe->dma.inline_data; + int i; + + for (i = 0; i < ibwr->num_sge; i++, sge++) { + memcpy(p, (void *)(uintptr_t)sge->addr, sge->length); + p += sge->length; + } +} + +static void init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr, unsigned int mask, unsigned int length, struct rxe_send_wqe *wqe) { int num_sge = ibwr->num_sge; - struct ib_sge *sge; - int i; - u8 *p; init_send_wr(qp, &wqe->wr, ibwr); + /* local operation */ + if (unlikely(mask & WR_REG_MASK)) { + wqe->mask = mask; + wqe->state = wqe_state_posted; + return; + } + if (qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_SMI || qp_type(qp) == IB_QPT_GSI) memcpy(&wqe->av, &to_rah(ud_wr(ibwr)->ah)->av, sizeof(wqe->av)); - if (unlikely(ibwr->send_flags & IB_SEND_INLINE)) { - p = wqe->dma.inline_data; - - sge = ibwr->sg_list; - for (i = 0; i < num_sge; i++, sge++) { - memcpy(p, (void *)(uintptr_t)sge->addr, - sge->length); - - p += sge->length; - } - } else if (mask & WR_REG_MASK) { - wqe->mask = mask; - wqe->state = wqe_state_posted; - return 0; - } else + if (unlikely(ibwr->send_flags & IB_SEND_INLINE)) + copy_inline_data_to_wqe(wqe, ibwr); + else memcpy(wqe->dma.sge, ibwr->sg_list, num_sge * sizeof(struct ib_sge)); @@ -599,8 +604,6 @@ static int init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr, wqe->dma.sge_offset = 0; wqe->state = wqe_state_posted; wqe->ssn = atomic_add_return(1, &qp->ssn); - - return 0; } static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr, @@ -623,10 +626,7 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr, } send_wqe = producer_addr(sq->queue); - - err = init_send_wqe(qp, ibwr, mask, length, send_wqe); - if (unlikely(err)) - goto err1; + init_send_wqe(qp, ibwr, mask, length, send_wqe); advance_producer(sq->queue); spin_unlock_irqrestore(&qp->sq.sq_lock, flags); @@ -774,7 +774,7 @@ static int rxe_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, if (err) return err; - return rxe_add_to_pool(&rxe->cq_pool, &cq->pelem); + return rxe_add_to_pool(&rxe->cq_pool, cq); } static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) @@ -1118,7 +1118,7 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) struct ib_device *dev = &rxe->ib_dev; struct crypto_shash *tfm; - strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc)); + strscpy(dev->node_desc, "rxe", sizeof(dev->node_desc)); dev->node_type = RDMA_NODE_IB_CA; dev->phys_port_cnt = 1; diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index adda78996219..368959ae9a8c 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -653,7 +653,7 @@ static inline struct siw_sqe *orq_get_free(struct siw_qp *qp) { struct siw_sqe *orq_e = orq_get_tail(qp); - if (orq_e && READ_ONCE(orq_e->flags) == 0) + if (READ_ONCE(orq_e->flags) == 0) return orq_e; return NULL; diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index ee95cf29179d..cf55326f2ab4 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -135,7 +135,7 @@ static struct { static int siw_init_cpulist(void) { - int i, num_nodes = num_possible_nodes(); + int i, num_nodes = nr_node_ids; memset(siw_tx_thread, 0, sizeof(siw_tx_thread)); @@ -357,7 +357,7 @@ static struct siw_device *siw_device_create(struct net_device *netdev) sizeof(base_dev->iw_ifname)); /* Disable TCP port mapping */ - base_dev->iw_driver_flags = IW_F_NO_PORT_MAP, + base_dev->iw_driver_flags = IW_F_NO_PORT_MAP; sdev->attrs.max_qp = SIW_MAX_QP; sdev->attrs.max_qp_wr = SIW_MAX_QP_WR; diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c index 875d36d4b1c6..ddb2e66f9f13 100644 --- a/drivers/infiniband/sw/siw/siw_qp.c +++ b/drivers/infiniband/sw/siw/siw_qp.c @@ -199,26 +199,26 @@ void siw_qp_llp_write_space(struct sock *sk) static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size) { - irq_size = roundup_pow_of_two(irq_size); - orq_size = roundup_pow_of_two(orq_size); - - qp->attrs.irq_size = irq_size; - qp->attrs.orq_size = orq_size; - - qp->irq = vzalloc(irq_size * sizeof(struct siw_sqe)); - if (!qp->irq) { - siw_dbg_qp(qp, "irq malloc for %d failed\n", irq_size); - qp->attrs.irq_size = 0; - return -ENOMEM; + if (irq_size) { + irq_size = roundup_pow_of_two(irq_size); + qp->irq = vzalloc(irq_size * sizeof(struct siw_sqe)); + if (!qp->irq) { + qp->attrs.irq_size = 0; + return -ENOMEM; + } } - qp->orq = vzalloc(orq_size * sizeof(struct siw_sqe)); - if (!qp->orq) { - siw_dbg_qp(qp, "orq malloc for %d failed\n", orq_size); - qp->attrs.orq_size = 0; - qp->attrs.irq_size = 0; - vfree(qp->irq); - return -ENOMEM; + if (orq_size) { + orq_size = roundup_pow_of_two(orq_size); + qp->orq = vzalloc(orq_size * sizeof(struct siw_sqe)); + if (!qp->orq) { + qp->attrs.orq_size = 0; + qp->attrs.irq_size = 0; + vfree(qp->irq); + return -ENOMEM; + } } + qp->attrs.irq_size = irq_size; + qp->attrs.orq_size = orq_size; siw_dbg_qp(qp, "ORD %d, IRD %d\n", orq_size, irq_size); return 0; } @@ -288,13 +288,14 @@ int siw_qp_mpa_rts(struct siw_qp *qp, enum mpa_v2_ctrl ctrl) if (ctrl & MPA_V2_RDMA_WRITE_RTR) wqe->sqe.opcode = SIW_OP_WRITE; else if (ctrl & MPA_V2_RDMA_READ_RTR) { - struct siw_sqe *rreq; + struct siw_sqe *rreq = NULL; wqe->sqe.opcode = SIW_OP_READ; spin_lock(&qp->orq_lock); - rreq = orq_get_free(qp); + if (qp->attrs.orq_size) + rreq = orq_get_free(qp); if (rreq) { siw_read_to_orq(rreq, &wqe->sqe); qp->orq_put++; @@ -877,135 +878,88 @@ void siw_read_to_orq(struct siw_sqe *rreq, struct siw_sqe *sqe) rreq->num_sge = 1; } -/* - * Must be called with SQ locked. - * To avoid complete SQ starvation by constant inbound READ requests, - * the active IRQ will not be served after qp->irq_burst, if the - * SQ has pending work. - */ -int siw_activate_tx(struct siw_qp *qp) +static int siw_activate_tx_from_sq(struct siw_qp *qp) { - struct siw_sqe *irqe, *sqe; + struct siw_sqe *sqe; struct siw_wqe *wqe = tx_wqe(qp); int rv = 1; - irqe = &qp->irq[qp->irq_get % qp->attrs.irq_size]; - - if (irqe->flags & SIW_WQE_VALID) { - sqe = sq_get_next(qp); - - /* - * Avoid local WQE processing starvation in case - * of constant inbound READ request stream - */ - if (sqe && ++qp->irq_burst >= SIW_IRQ_MAXBURST_SQ_ACTIVE) { - qp->irq_burst = 0; - goto skip_irq; - } - memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE); - wqe->wr_status = SIW_WR_QUEUED; - - /* start READ RESPONSE */ - wqe->sqe.opcode = SIW_OP_READ_RESPONSE; - wqe->sqe.flags = 0; - if (irqe->num_sge) { - wqe->sqe.num_sge = 1; - wqe->sqe.sge[0].length = irqe->sge[0].length; - wqe->sqe.sge[0].laddr = irqe->sge[0].laddr; - wqe->sqe.sge[0].lkey = irqe->sge[0].lkey; - } else { - wqe->sqe.num_sge = 0; - } - - /* Retain original RREQ's message sequence number for - * potential error reporting cases. - */ - wqe->sqe.sge[1].length = irqe->sge[1].length; - - wqe->sqe.rkey = irqe->rkey; - wqe->sqe.raddr = irqe->raddr; + sqe = sq_get_next(qp); + if (!sqe) + return 0; - wqe->processed = 0; - qp->irq_get++; + memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE); + wqe->wr_status = SIW_WR_QUEUED; - /* mark current IRQ entry free */ - smp_store_mb(irqe->flags, 0); + /* First copy SQE to kernel private memory */ + memcpy(&wqe->sqe, sqe, sizeof(*sqe)); + if (wqe->sqe.opcode >= SIW_NUM_OPCODES) { + rv = -EINVAL; goto out; } - sqe = sq_get_next(qp); - if (sqe) { -skip_irq: - memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE); - wqe->wr_status = SIW_WR_QUEUED; - - /* First copy SQE to kernel private memory */ - memcpy(&wqe->sqe, sqe, sizeof(*sqe)); - - if (wqe->sqe.opcode >= SIW_NUM_OPCODES) { + if (wqe->sqe.flags & SIW_WQE_INLINE) { + if (wqe->sqe.opcode != SIW_OP_SEND && + wqe->sqe.opcode != SIW_OP_WRITE) { rv = -EINVAL; goto out; } - if (wqe->sqe.flags & SIW_WQE_INLINE) { - if (wqe->sqe.opcode != SIW_OP_SEND && - wqe->sqe.opcode != SIW_OP_WRITE) { - rv = -EINVAL; - goto out; - } - if (wqe->sqe.sge[0].length > SIW_MAX_INLINE) { - rv = -EINVAL; - goto out; - } - wqe->sqe.sge[0].laddr = (uintptr_t)&wqe->sqe.sge[1]; - wqe->sqe.sge[0].lkey = 0; - wqe->sqe.num_sge = 1; + if (wqe->sqe.sge[0].length > SIW_MAX_INLINE) { + rv = -EINVAL; + goto out; } - if (wqe->sqe.flags & SIW_WQE_READ_FENCE) { - /* A READ cannot be fenced */ - if (unlikely(wqe->sqe.opcode == SIW_OP_READ || - wqe->sqe.opcode == - SIW_OP_READ_LOCAL_INV)) { - siw_dbg_qp(qp, "cannot fence read\n"); - rv = -EINVAL; - goto out; - } - spin_lock(&qp->orq_lock); + wqe->sqe.sge[0].laddr = (uintptr_t)&wqe->sqe.sge[1]; + wqe->sqe.sge[0].lkey = 0; + wqe->sqe.num_sge = 1; + } + if (wqe->sqe.flags & SIW_WQE_READ_FENCE) { + /* A READ cannot be fenced */ + if (unlikely(wqe->sqe.opcode == SIW_OP_READ || + wqe->sqe.opcode == + SIW_OP_READ_LOCAL_INV)) { + siw_dbg_qp(qp, "cannot fence read\n"); + rv = -EINVAL; + goto out; + } + spin_lock(&qp->orq_lock); - if (!siw_orq_empty(qp)) { - qp->tx_ctx.orq_fence = 1; - rv = 0; - } - spin_unlock(&qp->orq_lock); + if (qp->attrs.orq_size && !siw_orq_empty(qp)) { + qp->tx_ctx.orq_fence = 1; + rv = 0; + } + spin_unlock(&qp->orq_lock); - } else if (wqe->sqe.opcode == SIW_OP_READ || - wqe->sqe.opcode == SIW_OP_READ_LOCAL_INV) { - struct siw_sqe *rreq; + } else if (wqe->sqe.opcode == SIW_OP_READ || + wqe->sqe.opcode == SIW_OP_READ_LOCAL_INV) { + struct siw_sqe *rreq; - wqe->sqe.num_sge = 1; + if (unlikely(!qp->attrs.orq_size)) { + /* We negotiated not to send READ req's */ + rv = -EINVAL; + goto out; + } + wqe->sqe.num_sge = 1; - spin_lock(&qp->orq_lock); + spin_lock(&qp->orq_lock); - rreq = orq_get_free(qp); - if (rreq) { - /* - * Make an immediate copy in ORQ to be ready - * to process loopback READ reply - */ - siw_read_to_orq(rreq, &wqe->sqe); - qp->orq_put++; - } else { - qp->tx_ctx.orq_fence = 1; - rv = 0; - } - spin_unlock(&qp->orq_lock); + rreq = orq_get_free(qp); + if (rreq) { + /* + * Make an immediate copy in ORQ to be ready + * to process loopback READ reply + */ + siw_read_to_orq(rreq, &wqe->sqe); + qp->orq_put++; + } else { + qp->tx_ctx.orq_fence = 1; + rv = 0; } - - /* Clear SQE, can be re-used by application */ - smp_store_mb(sqe->flags, 0); - qp->sq_get++; - } else { - rv = 0; + spin_unlock(&qp->orq_lock); } + + /* Clear SQE, can be re-used by application */ + smp_store_mb(sqe->flags, 0); + qp->sq_get++; out: if (unlikely(rv < 0)) { siw_dbg_qp(qp, "error %d\n", rv); @@ -1015,6 +969,65 @@ out: } /* + * Must be called with SQ locked. + * To avoid complete SQ starvation by constant inbound READ requests, + * the active IRQ will not be served after qp->irq_burst, if the + * SQ has pending work. + */ +int siw_activate_tx(struct siw_qp *qp) +{ + struct siw_sqe *irqe; + struct siw_wqe *wqe = tx_wqe(qp); + + if (!qp->attrs.irq_size) + return siw_activate_tx_from_sq(qp); + + irqe = &qp->irq[qp->irq_get % qp->attrs.irq_size]; + + if (!(irqe->flags & SIW_WQE_VALID)) + return siw_activate_tx_from_sq(qp); + + /* + * Avoid local WQE processing starvation in case + * of constant inbound READ request stream + */ + if (sq_get_next(qp) && ++qp->irq_burst >= SIW_IRQ_MAXBURST_SQ_ACTIVE) { + qp->irq_burst = 0; + return siw_activate_tx_from_sq(qp); + } + memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE); + wqe->wr_status = SIW_WR_QUEUED; + + /* start READ RESPONSE */ + wqe->sqe.opcode = SIW_OP_READ_RESPONSE; + wqe->sqe.flags = 0; + if (irqe->num_sge) { + wqe->sqe.num_sge = 1; + wqe->sqe.sge[0].length = irqe->sge[0].length; + wqe->sqe.sge[0].laddr = irqe->sge[0].laddr; + wqe->sqe.sge[0].lkey = irqe->sge[0].lkey; + } else { + wqe->sqe.num_sge = 0; + } + + /* Retain original RREQ's message sequence number for + * potential error reporting cases. + */ + wqe->sqe.sge[1].length = irqe->sge[1].length; + + wqe->sqe.rkey = irqe->rkey; + wqe->sqe.raddr = irqe->raddr; + + wqe->processed = 0; + qp->irq_get++; + + /* mark current IRQ entry free */ + smp_store_mb(irqe->flags, 0); + + return 1; +} + +/* * Check if current CQ state qualifies for calling CQ completion * handler. Must be called with CQ lock held. */ diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c index 4bd1f1f84057..60116f20653c 100644 --- a/drivers/infiniband/sw/siw/siw_qp_rx.c +++ b/drivers/infiniband/sw/siw/siw_qp_rx.c @@ -680,6 +680,10 @@ static int siw_init_rresp(struct siw_qp *qp, struct siw_rx_stream *srx) } spin_lock_irqsave(&qp->sq_lock, flags); + if (unlikely(!qp->attrs.irq_size)) { + run_sq = 0; + goto error_irq; + } if (tx_work->wr_status == SIW_WR_IDLE) { /* * immediately schedule READ response w/o @@ -712,8 +716,9 @@ static int siw_init_rresp(struct siw_qp *qp, struct siw_rx_stream *srx) /* RRESP now valid as current TX wqe or placed into IRQ */ smp_store_mb(resp->flags, SIW_WQE_VALID); } else { - pr_warn("siw: [QP %u]: irq %d exceeded %d\n", qp_id(qp), - qp->irq_put % qp->attrs.irq_size, qp->attrs.irq_size); +error_irq: + pr_warn("siw: [QP %u]: IRQ exceeded or null, size %d\n", + qp_id(qp), qp->attrs.irq_size); siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP, RDMAP_ETYPE_REMOTE_OPERATION, @@ -740,6 +745,9 @@ static int siw_orqe_start_rx(struct siw_qp *qp) struct siw_sqe *orqe; struct siw_wqe *wqe = NULL; + if (unlikely(!qp->attrs.orq_size)) + return -EPROTO; + /* make sure ORQ indices are current */ smp_mb(); @@ -796,8 +804,8 @@ int siw_proc_rresp(struct siw_qp *qp) */ rv = siw_orqe_start_rx(qp); if (rv) { - pr_warn("siw: [QP %u]: ORQ empty at idx %d\n", - qp_id(qp), qp->orq_get % qp->attrs.orq_size); + pr_warn("siw: [QP %u]: ORQ empty, size %d\n", + qp_id(qp), qp->attrs.orq_size); goto error_term; } rv = siw_rresp_check_ntoh(srx, frx); @@ -1290,11 +1298,13 @@ static int siw_rdmap_complete(struct siw_qp *qp, int error) wc_status); siw_wqe_put_mem(wqe, SIW_OP_READ); - if (!error) + if (!error) { rv = siw_check_tx_fence(qp); - else - /* Disable current ORQ eleement */ - WRITE_ONCE(orq_get_current(qp)->flags, 0); + } else { + /* Disable current ORQ element */ + if (qp->attrs.orq_size) + WRITE_ONCE(orq_get_current(qp)->flags, 0); + } break; case RDMAP_RDMA_READ_REQ: diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c index d19d8325588b..7989c4043db4 100644 --- a/drivers/infiniband/sw/siw/siw_qp_tx.c +++ b/drivers/infiniband/sw/siw/siw_qp_tx.c @@ -1107,8 +1107,8 @@ next_wqe: /* * RREQ may have already been completed by inbound RRESP! */ - if (tx_type == SIW_OP_READ || - tx_type == SIW_OP_READ_LOCAL_INV) { + if ((tx_type == SIW_OP_READ || + tx_type == SIW_OP_READ_LOCAL_INV) && qp->attrs.orq_size) { /* Cleanup pending entry in ORQ */ qp->orq_put--; qp->orq[qp->orq_put % qp->attrs.orq_size].flags = 0; diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 68fd053fc774..e389d44e5591 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -365,13 +365,23 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, if (rv) goto err_out; + num_sqe = attrs->cap.max_send_wr; + num_rqe = attrs->cap.max_recv_wr; + /* All queue indices are derived from modulo operations * on a free running 'get' (consumer) and 'put' (producer) * unsigned counter. Having queue sizes at power of two * avoids handling counter wrap around. */ - num_sqe = roundup_pow_of_two(attrs->cap.max_send_wr); - num_rqe = roundup_pow_of_two(attrs->cap.max_recv_wr); + if (num_sqe) + num_sqe = roundup_pow_of_two(num_sqe); + else { + /* Zero sized SQ is not supported */ + rv = -EINVAL; + goto err_out; + } + if (num_rqe) + num_rqe = roundup_pow_of_two(num_rqe); if (udata) qp->sendq = vmalloc_user(num_sqe * sizeof(struct siw_sqe)); @@ -379,7 +389,6 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, qp->sendq = vzalloc(num_sqe * sizeof(struct siw_sqe)); if (qp->sendq == NULL) { - siw_dbg(base_dev, "SQ size %d alloc failed\n", num_sqe); rv = -ENOMEM; goto err_out_xa; } @@ -413,7 +422,6 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, qp->recvq = vzalloc(num_rqe * sizeof(struct siw_rqe)); if (qp->recvq == NULL) { - siw_dbg(base_dev, "RQ size %d alloc failed\n", num_rqe); rv = -ENOMEM; goto err_out_xa; } @@ -966,9 +974,9 @@ int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr, unsigned long flags; int rv = 0; - if (qp->srq) { + if (qp->srq || qp->attrs.rq_size == 0) { *bad_wr = wr; - return -EOPNOTSUPP; /* what else from errno.h? */ + return -EINVAL; } if (!rdma_is_kernel_res(&qp->base_qp.res)) { siw_dbg_qp(qp, "no kernel post_recv for user mapped rq\n"); diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 3440dc48d02c..179ff1d068e5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -413,7 +413,6 @@ struct ipoib_dev_priv { u64 hca_caps; struct ipoib_ethtool_st ethtool; unsigned int max_send_sge; - bool sm_fullmember_sendonly_support; const struct net_device_ops *rn_ops; }; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index a6f413491321..e16b40c09f82 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -141,8 +141,6 @@ int ipoib_open(struct net_device *dev) set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); - priv->sm_fullmember_sendonly_support = false; - if (ipoib_ib_dev_open(dev)) { if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) return 0; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 86e4ed64e4e2..5b3154503bf4 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -275,7 +275,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, memset(&av, 0, sizeof(av)); av.type = rdma_ah_find_type(priv->ca, priv->port); - rdma_ah_set_dlid(&av, be16_to_cpu(mcast->mcmember.mlid)), + rdma_ah_set_dlid(&av, be16_to_cpu(mcast->mcmember.mlid)); rdma_ah_set_port_num(&av, priv->port); rdma_ah_set_sl(&av, mcast->mcmember.sl); rdma_ah_set_static_rate(&av, mcast->mcmember.rate); @@ -334,15 +334,6 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work) return; } /* - * Check if can send sendonly MCG's with sendonly-fullmember join state. - * It done here after the successfully join to the broadcast group, - * because the broadcast group must always be joined first and is always - * re-joined if the SM changes substantially. - */ - priv->sm_fullmember_sendonly_support = - ib_sa_sendonly_fullmem_support(&ipoib_sa_client, - priv->ca, priv->port); - /* * Take rtnl_lock to avoid racing with ipoib_stop() and * turning the carrier back on while a device is being * removed. However, ipoib_stop() will attempt to flush @@ -537,9 +528,7 @@ static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast) * most closely emulates the behavior, from a user space * application perspective, of Ethernet multicast operation. */ - if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) && - priv->sm_fullmember_sendonly_support) - /* SM supports sendonly-fullmember, otherwise fallback to full-member */ + if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) rec.join_state = SENDONLY_FULLMEMBER_JOIN; } spin_unlock_irq(&priv->lock); diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 4792b9bf400f..8fcaa1136f2c 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -89,13 +89,20 @@ int iser_debug_level = 0; module_param_named(debug_level, iser_debug_level, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 (default:disabled)"); +static int iscsi_iser_set(const char *val, const struct kernel_param *kp); +static const struct kernel_param_ops iscsi_iser_size_ops = { + .set = iscsi_iser_set, + .get = param_get_uint, +}; + static unsigned int iscsi_max_lun = 512; -module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO); -MODULE_PARM_DESC(max_lun, "Max LUNs to allow per session (default:512"); +module_param_cb(max_lun, &iscsi_iser_size_ops, &iscsi_max_lun, S_IRUGO); +MODULE_PARM_DESC(max_lun, "Max LUNs to allow per session, should > 0 (default:512)"); unsigned int iser_max_sectors = ISER_DEF_MAX_SECTORS; -module_param_named(max_sectors, iser_max_sectors, uint, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(max_sectors, "Max number of sectors in a single scsi command (default:1024"); +module_param_cb(max_sectors, &iscsi_iser_size_ops, &iser_max_sectors, + S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(max_sectors, "Max number of sectors in a single scsi command, should > 0 (default:1024)"); bool iser_always_reg = true; module_param_named(always_register, iser_always_reg, bool, S_IRUGO); @@ -110,6 +117,18 @@ int iser_pi_guard; module_param_named(pi_guard, iser_pi_guard, int, S_IRUGO); MODULE_PARM_DESC(pi_guard, "T10-PI guard_type [deprecated]"); +static int iscsi_iser_set(const char *val, const struct kernel_param *kp) +{ + int ret; + unsigned int n = 0; + + ret = kstrtouint(val, 10, &n); + if (ret != 0 || n == 0) + return -EINVAL; + + return param_set_uint(val, kp); +} + /* * iscsi_iser_recv() - Process a successful recv completion * @conn: iscsi connection @@ -571,13 +590,20 @@ iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session) static inline unsigned int iser_dif_prot_caps(int prot_caps) { - return ((prot_caps & IB_PROT_T10DIF_TYPE_1) ? - SHOST_DIF_TYPE1_PROTECTION | SHOST_DIX_TYPE0_PROTECTION | - SHOST_DIX_TYPE1_PROTECTION : 0) | - ((prot_caps & IB_PROT_T10DIF_TYPE_2) ? - SHOST_DIF_TYPE2_PROTECTION | SHOST_DIX_TYPE2_PROTECTION : 0) | - ((prot_caps & IB_PROT_T10DIF_TYPE_3) ? - SHOST_DIF_TYPE3_PROTECTION | SHOST_DIX_TYPE3_PROTECTION : 0); + int ret = 0; + + if (prot_caps & IB_PROT_T10DIF_TYPE_1) + ret |= SHOST_DIF_TYPE1_PROTECTION | + SHOST_DIX_TYPE0_PROTECTION | + SHOST_DIX_TYPE1_PROTECTION; + if (prot_caps & IB_PROT_T10DIF_TYPE_2) + ret |= SHOST_DIF_TYPE2_PROTECTION | + SHOST_DIX_TYPE2_PROTECTION; + if (prot_caps & IB_PROT_T10DIF_TYPE_3) + ret |= SHOST_DIF_TYPE3_PROTECTION | + SHOST_DIX_TYPE3_PROTECTION; + + return ret; } /** @@ -1009,11 +1035,6 @@ static int __init iser_init(void) iser_dbg("Starting iSER datamover...\n"); - if (iscsi_max_lun < 1) { - iser_err("Invalid max_lun value of %u\n", iscsi_max_lun); - return -EINVAL; - } - memset(&ig, 0, sizeof(struct iser_global)); ig.desc_cache = kmem_cache_create("iser_descriptors", diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index d4e057fac219..afec40da9b58 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -169,7 +169,7 @@ iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_domain *domain) domain->sig.dif.ref_escape = true; if (sc->prot_flags & SCSI_PROT_REF_INCREMENT) domain->sig.dif.ref_remap = true; -}; +} static int iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs) @@ -390,4 +390,3 @@ err_reg: return err; } - diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 2bd18b006893..136f6c4492e0 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -685,7 +685,7 @@ static void iser_cleanup_handler(struct rdma_cm_id *cma_id, iser_disconnected_handler(cma_id); iser_free_ib_conn_res(iser_conn, destroy); complete(&iser_conn->ib_completion); -}; +} static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 2ba27221ea85..7305ed8976c2 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -71,7 +71,6 @@ static int isert_sg_tablesize_set(const char *val, const struct kernel_param *kp return param_set_int(val, kp); } - static inline bool isert_prot_cmd(struct isert_conn *conn, struct se_cmd *cmd) { @@ -79,7 +78,6 @@ isert_prot_cmd(struct isert_conn *conn, struct se_cmd *cmd) cmd->prot_op != TARGET_PROT_NORMAL); } - static void isert_qp_event_callback(struct ib_event *e, void *context) { @@ -232,8 +230,10 @@ isert_create_device_ib_res(struct isert_device *device) } /* Check signature cap */ - device->pi_capable = ib_dev->attrs.device_cap_flags & - IB_DEVICE_INTEGRITY_HANDOVER ? true : false; + if (ib_dev->attrs.device_cap_flags & IB_DEVICE_INTEGRITY_HANDOVER) + device->pi_capable = true; + else + device->pi_capable = false; return 0; } @@ -1993,7 +1993,7 @@ isert_set_dif_domain(struct se_cmd *se_cmd, struct ib_sig_domain *domain) if (se_cmd->prot_type == TARGET_DIF_TYPE1_PROT || se_cmd->prot_type == TARGET_DIF_TYPE2_PROT) domain->sig.dif.ref_remap = true; -}; +} static int isert_set_sig_attrs(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs) diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c index 4933085a864a..cecf0f7cadf9 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c @@ -233,7 +233,7 @@ static void vema_get_class_port_info(struct opa_vnic_vema_port *port, port_info = (struct opa_class_port_info *)rsp_mad->data; memcpy(port_info, &port->class_port_info, sizeof(*port_info)); - port_info->base_version = OPA_MGMT_BASE_VERSION, + port_info->base_version = OPA_MGMT_BASE_VERSION; port_info->class_version = OPA_EMA_CLASS_VERSION; /* diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c index ba00f0de14ca..b6a0abf40589 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c @@ -408,6 +408,7 @@ int rtrs_clt_create_sess_files(struct rtrs_clt_sess *sess) "%s", str); if (err) { pr_err("kobject_init_and_add: %d\n", err); + kobject_put(&sess->kobj); return err; } err = sysfs_create_group(&sess->kobj, &rtrs_clt_sess_attr_group); @@ -419,6 +420,7 @@ int rtrs_clt_create_sess_files(struct rtrs_clt_sess *sess) &sess->kobj, "stats"); if (err) { pr_err("kobject_init_and_add: %d\n", err); + kobject_put(&sess->stats->kobj_stats); goto remove_group; } @@ -469,15 +471,12 @@ int rtrs_clt_create_sysfs_root_files(struct rtrs_clt *clt) return sysfs_create_group(&clt->dev.kobj, &rtrs_clt_attr_group); } -void rtrs_clt_destroy_sysfs_root_folders(struct rtrs_clt *clt) +void rtrs_clt_destroy_sysfs_root(struct rtrs_clt *clt) { + sysfs_remove_group(&clt->dev.kobj, &rtrs_clt_attr_group); + if (clt->kobj_paths) { kobject_del(clt->kobj_paths); kobject_put(clt->kobj_paths); } } - -void rtrs_clt_destroy_sysfs_root_files(struct rtrs_clt *clt) -{ - sysfs_remove_group(&clt->dev.kobj, &rtrs_clt_attr_group); -} diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 67f86c405a26..0a08b4b742a3 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -31,6 +31,8 @@ */ #define RTRS_RECONNECT_SEED 8 +#define FIRST_CONN 0x01 + MODULE_DESCRIPTION("RDMA Transport Client"); MODULE_LICENSE("GPL"); @@ -178,18 +180,18 @@ struct rtrs_clt_con *rtrs_permit_to_clt_con(struct rtrs_clt_sess *sess, } /** - * __rtrs_clt_change_state() - change the session state through session state + * rtrs_clt_change_state() - change the session state through session state * machine. * * @sess: client session to change the state of. * @new_state: state to change to. * - * returns true if successful, false if the requested state can not be set. + * returns true if sess's state is changed to new state, otherwise return false. * * Locks: * state_wq lock must be hold. */ -static bool __rtrs_clt_change_state(struct rtrs_clt_sess *sess, +static bool rtrs_clt_change_state(struct rtrs_clt_sess *sess, enum rtrs_clt_state new_state) { enum rtrs_clt_state old_state; @@ -286,7 +288,7 @@ static bool rtrs_clt_change_state_from_to(struct rtrs_clt_sess *sess, spin_lock_irq(&sess->state_wq.lock); if (sess->state == old_state) - changed = __rtrs_clt_change_state(sess, new_state); + changed = rtrs_clt_change_state(sess, new_state); spin_unlock_irq(&sess->state_wq.lock); return changed; @@ -494,7 +496,7 @@ static void rtrs_clt_recv_done(struct rtrs_clt_con *con, struct ib_wc *wc) int err; struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); - WARN_ON(sess->flags != RTRS_MSG_NEW_RKEY_F); + WARN_ON((sess->flags & RTRS_MSG_NEW_RKEY_F) == 0); iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); err = rtrs_iu_post_recv(&con->c, iu); @@ -514,7 +516,7 @@ static void rtrs_clt_rkey_rsp_done(struct rtrs_clt_con *con, struct ib_wc *wc) u32 buf_id; int err; - WARN_ON(sess->flags != RTRS_MSG_NEW_RKEY_F); + WARN_ON((sess->flags & RTRS_MSG_NEW_RKEY_F) == 0); iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); @@ -621,12 +623,12 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc) } else if (imm_type == RTRS_HB_MSG_IMM) { WARN_ON(con->c.cid); rtrs_send_hb_ack(&sess->s); - if (sess->flags == RTRS_MSG_NEW_RKEY_F) + if (sess->flags & RTRS_MSG_NEW_RKEY_F) return rtrs_clt_recv_done(con, wc); } else if (imm_type == RTRS_HB_ACK_IMM) { WARN_ON(con->c.cid); sess->s.hb_missed_cnt = 0; - if (sess->flags == RTRS_MSG_NEW_RKEY_F) + if (sess->flags & RTRS_MSG_NEW_RKEY_F) return rtrs_clt_recv_done(con, wc); } else { rtrs_wrn(con->c.sess, "Unknown IMM type %u\n", @@ -654,7 +656,7 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc) WARN_ON(!(wc->wc_flags & IB_WC_WITH_INVALIDATE || wc->wc_flags & IB_WC_WITH_IMM)); WARN_ON(wc->wr_cqe->done != rtrs_clt_rdma_done); - if (sess->flags == RTRS_MSG_NEW_RKEY_F) { + if (sess->flags & RTRS_MSG_NEW_RKEY_F) { if (wc->wc_flags & IB_WC_WITH_INVALIDATE) return rtrs_clt_recv_done(con, wc); @@ -664,7 +666,6 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc) case IB_WC_RDMA_WRITE: /* * post_send() RDMA write completions of IO reqs (read/write) - * and hb */ break; @@ -680,7 +681,7 @@ static int post_recv_io(struct rtrs_clt_con *con, size_t q_size) struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); for (i = 0; i < q_size; i++) { - if (sess->flags == RTRS_MSG_NEW_RKEY_F) { + if (sess->flags & RTRS_MSG_NEW_RKEY_F) { struct rtrs_iu *iu = &con->rsp_ius[i]; err = rtrs_iu_post_recv(&con->c, iu); @@ -1318,6 +1319,12 @@ out_err: static void free_permits(struct rtrs_clt *clt) { + if (clt->permits_map) { + size_t sz = clt->queue_depth; + + wait_event(clt->permits_wait, + find_first_bit(clt->permits_map, sz) >= sz); + } kfree(clt->permits_map); clt->permits_map = NULL; kfree(clt->permits); @@ -1353,21 +1360,14 @@ static bool rtrs_clt_change_state_get_old(struct rtrs_clt_sess *sess, bool changed; spin_lock_irq(&sess->state_wq.lock); - *old_state = sess->state; - changed = __rtrs_clt_change_state(sess, new_state); + if (old_state) + *old_state = sess->state; + changed = rtrs_clt_change_state(sess, new_state); spin_unlock_irq(&sess->state_wq.lock); return changed; } -static bool rtrs_clt_change_state(struct rtrs_clt_sess *sess, - enum rtrs_clt_state new_state) -{ - enum rtrs_clt_state old_state; - - return rtrs_clt_change_state_get_old(sess, new_state, &old_state); -} - static void rtrs_clt_hb_err_handler(struct rtrs_con *c) { struct rtrs_clt_con *con = container_of(c, typeof(*con), c); @@ -1511,7 +1511,7 @@ static void destroy_con(struct rtrs_clt_con *con) static int create_con_cq_qp(struct rtrs_clt_con *con) { struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); - u16 wr_queue_size; + u32 max_send_wr, max_recv_wr, cq_size; int err, cq_vector; struct rtrs_msg_rkey_rsp *rsp; @@ -1523,7 +1523,8 @@ static int create_con_cq_qp(struct rtrs_clt_con *con) * + 2 for drain and heartbeat * in case qp gets into error state */ - wr_queue_size = SERVICE_CON_QUEUE_DEPTH * 3 + 2; + max_send_wr = SERVICE_CON_QUEUE_DEPTH * 2 + 2; + max_recv_wr = SERVICE_CON_QUEUE_DEPTH * 2 + 2; /* We must be the first here */ if (WARN_ON(sess->s.dev)) return -EINVAL; @@ -1555,25 +1556,29 @@ static int create_con_cq_qp(struct rtrs_clt_con *con) /* Shared between connections */ sess->s.dev_ref++; - wr_queue_size = + max_send_wr = min_t(int, sess->s.dev->ib_dev->attrs.max_qp_wr, /* QD * (REQ + RSP + FR REGS or INVS) + drain */ sess->queue_depth * 3 + 1); + max_recv_wr = + min_t(int, sess->s.dev->ib_dev->attrs.max_qp_wr, + sess->queue_depth * 3 + 1); } /* alloc iu to recv new rkey reply when server reports flags set */ - if (sess->flags == RTRS_MSG_NEW_RKEY_F || con->c.cid == 0) { - con->rsp_ius = rtrs_iu_alloc(wr_queue_size, sizeof(*rsp), + if (sess->flags & RTRS_MSG_NEW_RKEY_F || con->c.cid == 0) { + con->rsp_ius = rtrs_iu_alloc(max_recv_wr, sizeof(*rsp), GFP_KERNEL, sess->s.dev->ib_dev, DMA_FROM_DEVICE, rtrs_clt_rdma_done); if (!con->rsp_ius) return -ENOMEM; - con->queue_size = wr_queue_size; + con->queue_size = max_recv_wr; } + cq_size = max_send_wr + max_recv_wr; cq_vector = con->cpu % sess->s.dev->ib_dev->num_comp_vectors; err = rtrs_cq_qp_create(&sess->s, &con->c, sess->max_send_sge, - cq_vector, wr_queue_size, wr_queue_size, - IB_POLL_SOFTIRQ); + cq_vector, cq_size, max_send_wr, + max_recv_wr, IB_POLL_SOFTIRQ); /* * In case of error we do not bother to clean previous allocations, * since destroy_con_cq_qp() must be called. @@ -1657,6 +1662,7 @@ static int rtrs_rdma_route_resolved(struct rtrs_clt_con *con) .cid_num = cpu_to_le16(sess->s.con_num), .recon_cnt = cpu_to_le16(sess->s.recon_cnt), }; + msg.first_conn = sess->for_new_clt ? FIRST_CONN : 0; uuid_copy(&msg.sess_uuid, &sess->s.uuid); uuid_copy(&msg.paths_uuid, &clt->paths_uuid); @@ -1742,6 +1748,8 @@ static int rtrs_rdma_conn_established(struct rtrs_clt_con *con, scnprintf(sess->hca_name, sizeof(sess->hca_name), sess->s.dev->ib_dev->name); sess->s.src_addr = con->c.cm_id->route.addr.src_addr; + /* set for_new_clt, to allow future reconnect on any path */ + sess->for_new_clt = 1; } return 0; @@ -1788,7 +1796,7 @@ static int rtrs_rdma_conn_rejected(struct rtrs_clt_con *con, static void rtrs_clt_close_conns(struct rtrs_clt_sess *sess, bool wait) { - if (rtrs_clt_change_state(sess, RTRS_CLT_CLOSING)) + if (rtrs_clt_change_state_get_old(sess, RTRS_CLT_CLOSING, NULL)) queue_work(rtrs_wq, &sess->close_work); if (wait) flush_work(&sess->close_work); @@ -2174,7 +2182,7 @@ static void rtrs_clt_close_work(struct work_struct *work) cancel_delayed_work_sync(&sess->reconnect_dwork); rtrs_clt_stop_and_destroy_conns(sess); - rtrs_clt_change_state(sess, RTRS_CLT_CLOSED); + rtrs_clt_change_state_get_old(sess, RTRS_CLT_CLOSED, NULL); } static int init_conns(struct rtrs_clt_sess *sess) @@ -2226,7 +2234,7 @@ destroy: * doing rdma_resolve_addr(), switch to CONNECTION_ERR state * manually to keep reconnecting. */ - rtrs_clt_change_state(sess, RTRS_CLT_CONNECTING_ERR); + rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING_ERR, NULL); return err; } @@ -2243,7 +2251,7 @@ static void rtrs_clt_info_req_done(struct ib_cq *cq, struct ib_wc *wc) if (unlikely(wc->status != IB_WC_SUCCESS)) { rtrs_err(sess->clt, "Sess info request send failed: %s\n", ib_wc_status_msg(wc->status)); - rtrs_clt_change_state(sess, RTRS_CLT_CONNECTING_ERR); + rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING_ERR, NULL); return; } @@ -2367,7 +2375,7 @@ static void rtrs_clt_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc) out: rtrs_clt_update_wc_stats(con); rtrs_iu_free(iu, sess->s.dev->ib_dev, 1); - rtrs_clt_change_state(sess, state); + rtrs_clt_change_state_get_old(sess, state, NULL); } static int rtrs_send_sess_info(struct rtrs_clt_sess *sess) @@ -2423,7 +2431,6 @@ static int rtrs_send_sess_info(struct rtrs_clt_sess *sess) err = -ECONNRESET; else err = -ETIMEDOUT; - goto out; } out: @@ -2433,7 +2440,7 @@ out: rtrs_iu_free(rx_iu, sess->s.dev->ib_dev, 1); if (unlikely(err)) /* If we've never taken async path because of malloc problems */ - rtrs_clt_change_state(sess, RTRS_CLT_CONNECTING_ERR); + rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING_ERR, NULL); return err; } @@ -2490,7 +2497,7 @@ static void rtrs_clt_reconnect_work(struct work_struct *work) /* Stop everything */ rtrs_clt_stop_and_destroy_conns(sess); msleep(RTRS_RECONNECT_BACKOFF); - if (rtrs_clt_change_state(sess, RTRS_CLT_CONNECTING)) { + if (rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING, NULL)) { err = init_sess(sess); if (err) goto reconnect_again; @@ -2499,7 +2506,7 @@ static void rtrs_clt_reconnect_work(struct work_struct *work) return; reconnect_again: - if (rtrs_clt_change_state(sess, RTRS_CLT_RECONNECTING)) { + if (rtrs_clt_change_state_get_old(sess, RTRS_CLT_RECONNECTING, NULL)) { sess->stats->reconnects.fail_cnt++; delay_ms = clt->reconnect_delay_sec * 1000; queue_delayed_work(rtrs_wq, &sess->reconnect_dwork, @@ -2565,11 +2572,8 @@ static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num, clt->dev.class = rtrs_clt_dev_class; clt->dev.release = rtrs_clt_dev_release; err = dev_set_name(&clt->dev, "%s", sessname); - if (err) { - free_percpu(clt->pcpu_path); - kfree(clt); - return ERR_PTR(err); - } + if (err) + goto err; /* * Suppress user space notification until * sysfs files are created @@ -2577,44 +2581,35 @@ static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num, dev_set_uevent_suppress(&clt->dev, true); err = device_register(&clt->dev); if (err) { - free_percpu(clt->pcpu_path); put_device(&clt->dev); - return ERR_PTR(err); + goto err; } clt->kobj_paths = kobject_create_and_add("paths", &clt->dev.kobj); if (!clt->kobj_paths) { - free_percpu(clt->pcpu_path); - device_unregister(&clt->dev); - return NULL; + err = -ENOMEM; + goto err_dev; } err = rtrs_clt_create_sysfs_root_files(clt); if (err) { - free_percpu(clt->pcpu_path); kobject_del(clt->kobj_paths); kobject_put(clt->kobj_paths); - device_unregister(&clt->dev); - return ERR_PTR(err); + goto err_dev; } dev_set_uevent_suppress(&clt->dev, false); kobject_uevent(&clt->dev.kobj, KOBJ_ADD); return clt; -} - -static void wait_for_inflight_permits(struct rtrs_clt *clt) -{ - if (clt->permits_map) { - size_t sz = clt->queue_depth; - - wait_event(clt->permits_wait, - find_first_bit(clt->permits_map, sz) >= sz); - } +err_dev: + device_unregister(&clt->dev); +err: + free_percpu(clt->pcpu_path); + kfree(clt); + return ERR_PTR(err); } static void free_clt(struct rtrs_clt *clt) { - wait_for_inflight_permits(clt); free_permits(clt); free_percpu(clt->pcpu_path); mutex_destroy(&clt->paths_ev_mutex); @@ -2672,6 +2667,8 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops, err = PTR_ERR(sess); goto close_all_sess; } + if (!i) + sess->for_new_clt = 1; list_add_tail_rcu(&sess->s.entry, &clt->paths_list); err = init_sess(sess); @@ -2702,8 +2699,7 @@ close_all_sess: rtrs_clt_close_conns(sess, true); kobject_put(&sess->kobj); } - rtrs_clt_destroy_sysfs_root_files(clt); - rtrs_clt_destroy_sysfs_root_folders(clt); + rtrs_clt_destroy_sysfs_root(clt); free_clt(clt); out: @@ -2720,8 +2716,7 @@ void rtrs_clt_close(struct rtrs_clt *clt) struct rtrs_clt_sess *sess, *tmp; /* Firstly forbid sysfs access */ - rtrs_clt_destroy_sysfs_root_files(clt); - rtrs_clt_destroy_sysfs_root_folders(clt); + rtrs_clt_destroy_sysfs_root(clt); /* Now it is safe to iterate over all paths without locks */ list_for_each_entry_safe(sess, tmp, &clt->paths_list, s.entry) { diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.h b/drivers/infiniband/ulp/rtrs/rtrs-clt.h index b8dbd701b3cb..692bc83e1f09 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.h @@ -143,6 +143,7 @@ struct rtrs_clt_sess { int max_send_sge; u32 flags; struct kobject kobj; + u8 for_new_clt; struct rtrs_clt_stats *stats; /* cache hca_port and hca_name to display in sysfs */ u8 hca_port; @@ -243,8 +244,7 @@ ssize_t rtrs_clt_reset_all_help(struct rtrs_clt_stats *stats, /* rtrs-clt-sysfs.c */ int rtrs_clt_create_sysfs_root_files(struct rtrs_clt *clt); -void rtrs_clt_destroy_sysfs_root_folders(struct rtrs_clt *clt); -void rtrs_clt_destroy_sysfs_root_files(struct rtrs_clt *clt); +void rtrs_clt_destroy_sysfs_root(struct rtrs_clt *clt); int rtrs_clt_create_sess_files(struct rtrs_clt_sess *sess); void rtrs_clt_destroy_sess_files(struct rtrs_clt_sess *sess, diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h index 3f2918671dbe..8caad0a2322b 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h @@ -188,7 +188,9 @@ struct rtrs_msg_conn_req { __le16 recon_cnt; uuid_t sess_uuid; uuid_t paths_uuid; - u8 reserved[12]; + u8 first_conn : 1; + u8 reserved_bits : 7; + u8 reserved[11]; }; /** @@ -303,8 +305,9 @@ int rtrs_post_rdma_write_imm_empty(struct rtrs_con *con, struct ib_cqe *cqe, struct ib_send_wr *head); int rtrs_cq_qp_create(struct rtrs_sess *rtrs_sess, struct rtrs_con *con, - u32 max_send_sge, int cq_vector, u16 cq_size, - u16 wr_queue_size, enum ib_poll_context poll_ctx); + u32 max_send_sge, int cq_vector, int cq_size, + u32 max_send_wr, u32 max_recv_wr, + enum ib_poll_context poll_ctx); void rtrs_cq_qp_destroy(struct rtrs_con *con); void rtrs_init_hb(struct rtrs_sess *sess, struct ib_cqe *cqe, diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c index d2edff3b8f0d..126a96e75c62 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c @@ -51,6 +51,8 @@ static ssize_t rtrs_srv_disconnect_store(struct kobject *kobj, sockaddr_to_str((struct sockaddr *)&sess->s.dst_addr, str, sizeof(str)); rtrs_info(s, "disconnect for path %s requested\n", str); + /* first remove sysfs itself to avoid deadlock */ + sysfs_remove_file_self(&sess->kobj, &attr->attr); close_sess(sess); return count; @@ -181,6 +183,7 @@ static int rtrs_srv_create_once_sysfs_root_folders(struct rtrs_srv_sess *sess) err = -ENOMEM; pr_err("kobject_create_and_add(): %d\n", err); device_del(&srv->dev); + put_device(&srv->dev); goto unlock; } dev_set_uevent_suppress(&srv->dev, false); @@ -206,6 +209,7 @@ rtrs_srv_destroy_once_sysfs_root_folders(struct rtrs_srv_sess *sess) kobject_put(srv->kobj_paths); mutex_unlock(&srv->paths_mutex); device_del(&srv->dev); + put_device(&srv->dev); } else { mutex_unlock(&srv->paths_mutex); } @@ -234,6 +238,7 @@ static int rtrs_srv_create_stats_files(struct rtrs_srv_sess *sess) &sess->kobj, "stats"); if (err) { rtrs_err(s, "kobject_init_and_add(): %d\n", err); + kobject_put(&sess->stats->kobj_stats); return err; } err = sysfs_create_group(&sess->stats->kobj_stats, @@ -290,8 +295,8 @@ remove_group: sysfs_remove_group(&sess->kobj, &rtrs_srv_sess_attr_group); put_kobj: kobject_del(&sess->kobj); - kobject_put(&sess->kobj); destroy_root: + kobject_put(&sess->kobj); rtrs_srv_destroy_once_sysfs_root_folders(sess); return err; @@ -302,7 +307,7 @@ void rtrs_srv_destroy_sess_files(struct rtrs_srv_sess *sess) if (sess->kobj.state_in_sysfs) { kobject_del(&sess->stats->kobj_stats); kobject_put(&sess->stats->kobj_stats); - kobject_del(&sess->kobj); + sysfs_remove_group(&sess->kobj, &rtrs_srv_sess_attr_group); kobject_put(&sess->kobj); rtrs_srv_destroy_once_sysfs_root_folders(sess); diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index c42fd470c4eb..d071809e3ed2 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -222,7 +222,8 @@ static int rdma_write_sg(struct rtrs_srv_op *id) dma_addr_t dma_addr = sess->dma_addr[id->msg_id]; struct rtrs_srv_mr *srv_mr; struct rtrs_srv *srv = sess->srv; - struct ib_send_wr inv_wr, imm_wr; + struct ib_send_wr inv_wr; + struct ib_rdma_wr imm_wr; struct ib_rdma_wr *wr = NULL; enum ib_send_flags flags; size_t sg_cnt; @@ -267,21 +268,22 @@ static int rdma_write_sg(struct rtrs_srv_op *id) WARN_ON_ONCE(rkey != wr->rkey); wr->wr.opcode = IB_WR_RDMA_WRITE; + wr->wr.wr_cqe = &io_comp_cqe; wr->wr.ex.imm_data = 0; wr->wr.send_flags = 0; if (need_inval && always_invalidate) { wr->wr.next = &rwr.wr; rwr.wr.next = &inv_wr; - inv_wr.next = &imm_wr; + inv_wr.next = &imm_wr.wr; } else if (always_invalidate) { wr->wr.next = &rwr.wr; - rwr.wr.next = &imm_wr; + rwr.wr.next = &imm_wr.wr; } else if (need_inval) { wr->wr.next = &inv_wr; - inv_wr.next = &imm_wr; + inv_wr.next = &imm_wr.wr; } else { - wr->wr.next = &imm_wr; + wr->wr.next = &imm_wr.wr; } /* * From time to time we have to post signaled sends, @@ -294,16 +296,18 @@ static int rdma_write_sg(struct rtrs_srv_op *id) inv_wr.sg_list = NULL; inv_wr.num_sge = 0; inv_wr.opcode = IB_WR_SEND_WITH_INV; + inv_wr.wr_cqe = &io_comp_cqe; inv_wr.send_flags = 0; inv_wr.ex.invalidate_rkey = rkey; } - imm_wr.next = NULL; + imm_wr.wr.next = NULL; if (always_invalidate) { struct rtrs_msg_rkey_rsp *msg; srv_mr = &sess->mrs[id->msg_id]; rwr.wr.opcode = IB_WR_REG_MR; + rwr.wr.wr_cqe = &local_reg_cqe; rwr.wr.num_sge = 0; rwr.mr = srv_mr->mr; rwr.wr.send_flags = 0; @@ -318,22 +322,22 @@ static int rdma_write_sg(struct rtrs_srv_op *id) list.addr = srv_mr->iu->dma_addr; list.length = sizeof(*msg); list.lkey = sess->s.dev->ib_pd->local_dma_lkey; - imm_wr.sg_list = &list; - imm_wr.num_sge = 1; - imm_wr.opcode = IB_WR_SEND_WITH_IMM; + imm_wr.wr.sg_list = &list; + imm_wr.wr.num_sge = 1; + imm_wr.wr.opcode = IB_WR_SEND_WITH_IMM; ib_dma_sync_single_for_device(sess->s.dev->ib_dev, srv_mr->iu->dma_addr, srv_mr->iu->size, DMA_TO_DEVICE); } else { - imm_wr.sg_list = NULL; - imm_wr.num_sge = 0; - imm_wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM; + imm_wr.wr.sg_list = NULL; + imm_wr.wr.num_sge = 0; + imm_wr.wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM; } - imm_wr.send_flags = flags; - imm_wr.ex.imm_data = cpu_to_be32(rtrs_to_io_rsp_imm(id->msg_id, + imm_wr.wr.send_flags = flags; + imm_wr.wr.ex.imm_data = cpu_to_be32(rtrs_to_io_rsp_imm(id->msg_id, 0, need_inval)); - imm_wr.wr_cqe = &io_comp_cqe; + imm_wr.wr.wr_cqe = &io_comp_cqe; ib_dma_sync_single_for_device(sess->s.dev->ib_dev, dma_addr, offset, DMA_BIDIRECTIONAL); @@ -360,7 +364,8 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, { struct rtrs_sess *s = con->c.sess; struct rtrs_srv_sess *sess = to_srv_sess(s); - struct ib_send_wr inv_wr, imm_wr, *wr = NULL; + struct ib_send_wr inv_wr, *wr = NULL; + struct ib_rdma_wr imm_wr; struct ib_reg_wr rwr; struct rtrs_srv *srv = sess->srv; struct rtrs_srv_mr *srv_mr; @@ -379,6 +384,7 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, if (need_inval) { if (likely(sg_cnt)) { + inv_wr.wr_cqe = &io_comp_cqe; inv_wr.sg_list = NULL; inv_wr.num_sge = 0; inv_wr.opcode = IB_WR_SEND_WITH_INV; @@ -396,15 +402,15 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, if (need_inval && always_invalidate) { wr = &inv_wr; inv_wr.next = &rwr.wr; - rwr.wr.next = &imm_wr; + rwr.wr.next = &imm_wr.wr; } else if (always_invalidate) { wr = &rwr.wr; - rwr.wr.next = &imm_wr; + rwr.wr.next = &imm_wr.wr; } else if (need_inval) { wr = &inv_wr; - inv_wr.next = &imm_wr; + inv_wr.next = &imm_wr.wr; } else { - wr = &imm_wr; + wr = &imm_wr.wr; } /* * From time to time we have to post signalled sends, @@ -413,14 +419,15 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, flags = (atomic_inc_return(&con->wr_cnt) % srv->queue_depth) ? 0 : IB_SEND_SIGNALED; imm = rtrs_to_io_rsp_imm(id->msg_id, errno, need_inval); - imm_wr.next = NULL; + imm_wr.wr.next = NULL; if (always_invalidate) { struct ib_sge list; struct rtrs_msg_rkey_rsp *msg; srv_mr = &sess->mrs[id->msg_id]; - rwr.wr.next = &imm_wr; + rwr.wr.next = &imm_wr.wr; rwr.wr.opcode = IB_WR_REG_MR; + rwr.wr.wr_cqe = &local_reg_cqe; rwr.wr.num_sge = 0; rwr.wr.send_flags = 0; rwr.mr = srv_mr->mr; @@ -435,21 +442,21 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, list.addr = srv_mr->iu->dma_addr; list.length = sizeof(*msg); list.lkey = sess->s.dev->ib_pd->local_dma_lkey; - imm_wr.sg_list = &list; - imm_wr.num_sge = 1; - imm_wr.opcode = IB_WR_SEND_WITH_IMM; + imm_wr.wr.sg_list = &list; + imm_wr.wr.num_sge = 1; + imm_wr.wr.opcode = IB_WR_SEND_WITH_IMM; ib_dma_sync_single_for_device(sess->s.dev->ib_dev, srv_mr->iu->dma_addr, srv_mr->iu->size, DMA_TO_DEVICE); } else { - imm_wr.sg_list = NULL; - imm_wr.num_sge = 0; - imm_wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM; + imm_wr.wr.sg_list = NULL; + imm_wr.wr.num_sge = 0; + imm_wr.wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM; } - imm_wr.send_flags = flags; - imm_wr.wr_cqe = &io_comp_cqe; + imm_wr.wr.send_flags = flags; + imm_wr.wr.wr_cqe = &io_comp_cqe; - imm_wr.ex.imm_data = cpu_to_be32(imm); + imm_wr.wr.ex.imm_data = cpu_to_be32(imm); err = ib_post_send(id->con->c.qp, wr, NULL); if (unlikely(err)) @@ -651,7 +658,7 @@ static int map_cont_bufs(struct rtrs_srv_sess *sess) if (!srv_mr->iu) { err = -ENOMEM; rtrs_err(ss, "rtrs_iu_alloc(), err: %d\n", err); - goto free_iu; + goto dereg_mr; } } /* Eventually dma addr for each chunk can be cached */ @@ -667,7 +674,6 @@ err: srv_mr = &sess->mrs[mri]; sgt = &srv_mr->sgt; mr = srv_mr->mr; -free_iu: rtrs_iu_free(srv_mr->iu, sess->s.dev->ib_dev, 1); dereg_mr: ib_dereg_mr(mr); @@ -814,7 +820,7 @@ static int process_info_req(struct rtrs_srv_con *con, rwr[mri].wr.opcode = IB_WR_REG_MR; rwr[mri].wr.wr_cqe = &local_reg_cqe; rwr[mri].wr.num_sge = 0; - rwr[mri].wr.send_flags = mri ? 0 : IB_SEND_SIGNALED; + rwr[mri].wr.send_flags = 0; rwr[mri].mr = mr; rwr[mri].key = mr->rkey; rwr[mri].access = (IB_ACCESS_LOCAL_WRITE | @@ -1238,7 +1244,6 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc) case IB_WC_SEND: /* * post_send() RDMA write completions of IO reqs (read/write) - * and hb */ atomic_add(srv->queue_depth, &con->sq_wr_avail); @@ -1328,7 +1333,8 @@ static void free_srv(struct rtrs_srv *srv) } static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, - const uuid_t *paths_uuid) + const uuid_t *paths_uuid, + bool first_conn) { struct rtrs_srv *srv; int i; @@ -1341,13 +1347,18 @@ static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, return srv; } } + mutex_unlock(&ctx->srv_mutex); + /* + * If this request is not the first connection request from the + * client for this session then fail and return error. + */ + if (!first_conn) + return ERR_PTR(-ENXIO); /* need to allocate a new srv */ srv = kzalloc(sizeof(*srv), GFP_KERNEL); - if (!srv) { - mutex_unlock(&ctx->srv_mutex); - return NULL; - } + if (!srv) + return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&srv->paths_list); mutex_init(&srv->paths_mutex); @@ -1357,8 +1368,6 @@ static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, srv->ctx = ctx; device_initialize(&srv->dev); srv->dev.release = rtrs_srv_dev_release; - list_add(&srv->ctx_list, &ctx->srv_list); - mutex_unlock(&ctx->srv_mutex); srv->chunks = kcalloc(srv->queue_depth, sizeof(*srv->chunks), GFP_KERNEL); @@ -1371,6 +1380,9 @@ static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, goto err_free_chunks; } refcount_set(&srv->refcount, 1); + mutex_lock(&ctx->srv_mutex); + list_add(&srv->ctx_list, &ctx->srv_list); + mutex_unlock(&ctx->srv_mutex); return srv; @@ -1381,7 +1393,7 @@ err_free_chunks: err_free_srv: kfree(srv); - return NULL; + return ERR_PTR(-ENOMEM); } static void put_srv(struct rtrs_srv *srv) @@ -1461,10 +1473,12 @@ static bool __is_path_w_addr_exists(struct rtrs_srv *srv, static void free_sess(struct rtrs_srv_sess *sess) { - if (sess->kobj.state_in_sysfs) + if (sess->kobj.state_in_sysfs) { + kobject_del(&sess->kobj); kobject_put(&sess->kobj); - else + } else { kfree(sess); + } } static void rtrs_srv_close_work(struct work_struct *work) @@ -1586,7 +1600,7 @@ static int create_con(struct rtrs_srv_sess *sess, struct rtrs_sess *s = &sess->s; struct rtrs_srv_con *con; - u16 cq_size, wr_queue_size; + u32 cq_size, wr_queue_size; int err, cq_vector; con = kzalloc(sizeof(*con), GFP_KERNEL); @@ -1600,7 +1614,7 @@ static int create_con(struct rtrs_srv_sess *sess, con->c.cm_id = cm_id; con->c.sess = &sess->s; con->c.cid = cid; - atomic_set(&con->wr_cnt, 0); + atomic_set(&con->wr_cnt, 1); if (con->c.cid == 0) { /* @@ -1630,7 +1644,8 @@ static int create_con(struct rtrs_srv_sess *sess, /* TODO: SOFTIRQ can be faster, but be careful with softirq context */ err = rtrs_cq_qp_create(&sess->s, &con->c, 1, cq_vector, cq_size, - wr_queue_size, IB_POLL_WORKQUEUE); + wr_queue_size, wr_queue_size, + IB_POLL_WORKQUEUE); if (err) { rtrs_err(s, "rtrs_cq_qp_create(), err: %d\n", err); goto free_con; @@ -1781,13 +1796,9 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, goto reject_w_econnreset; } recon_cnt = le16_to_cpu(msg->recon_cnt); - srv = get_or_create_srv(ctx, &msg->paths_uuid); - /* - * "refcount == 0" happens if a previous thread calls get_or_create_srv - * allocate srv, but chunks of srv are not allocated yet. - */ - if (!srv || refcount_read(&srv->refcount) == 0) { - err = -ENOMEM; + srv = get_or_create_srv(ctx, &msg->paths_uuid, msg->first_conn); + if (IS_ERR(srv)) { + err = PTR_ERR(srv); goto reject_w_err; } mutex_lock(&srv->paths_mutex); @@ -1862,8 +1873,8 @@ reject_w_econnreset: return rtrs_rdma_do_reject(cm_id, -ECONNRESET); close_and_return_err: - close_sess(sess); mutex_unlock(&srv->paths_mutex); + close_sess(sess); return err; } diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c index 2e3a849e0a77..d13aff0aa816 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs.c @@ -182,16 +182,16 @@ int rtrs_post_rdma_write_imm_empty(struct rtrs_con *con, struct ib_cqe *cqe, u32 imm_data, enum ib_send_flags flags, struct ib_send_wr *head) { - struct ib_send_wr wr; + struct ib_rdma_wr wr; - wr = (struct ib_send_wr) { - .wr_cqe = cqe, - .send_flags = flags, - .opcode = IB_WR_RDMA_WRITE_WITH_IMM, - .ex.imm_data = cpu_to_be32(imm_data), + wr = (struct ib_rdma_wr) { + .wr.wr_cqe = cqe, + .wr.send_flags = flags, + .wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM, + .wr.ex.imm_data = cpu_to_be32(imm_data), }; - return rtrs_post_send(con->qp, head, &wr); + return rtrs_post_send(con->qp, head, &wr.wr); } EXPORT_SYMBOL_GPL(rtrs_post_rdma_write_imm_empty); @@ -231,14 +231,14 @@ static int create_cq(struct rtrs_con *con, int cq_vector, u16 cq_size, } static int create_qp(struct rtrs_con *con, struct ib_pd *pd, - u16 wr_queue_size, u32 max_sge) + u32 max_send_wr, u32 max_recv_wr, u32 max_sge) { struct ib_qp_init_attr init_attr = {NULL}; struct rdma_cm_id *cm_id = con->cm_id; int ret; - init_attr.cap.max_send_wr = wr_queue_size; - init_attr.cap.max_recv_wr = wr_queue_size; + init_attr.cap.max_send_wr = max_send_wr; + init_attr.cap.max_recv_wr = max_recv_wr; init_attr.cap.max_recv_sge = 1; init_attr.event_handler = qp_event_handler; init_attr.qp_context = con; @@ -260,8 +260,9 @@ static int create_qp(struct rtrs_con *con, struct ib_pd *pd, } int rtrs_cq_qp_create(struct rtrs_sess *sess, struct rtrs_con *con, - u32 max_send_sge, int cq_vector, u16 cq_size, - u16 wr_queue_size, enum ib_poll_context poll_ctx) + u32 max_send_sge, int cq_vector, int cq_size, + u32 max_send_wr, u32 max_recv_wr, + enum ib_poll_context poll_ctx) { int err; @@ -269,7 +270,8 @@ int rtrs_cq_qp_create(struct rtrs_sess *sess, struct rtrs_con *con, if (err) return err; - err = create_qp(con, sess->dev->ib_pd, wr_queue_size, max_send_sge); + err = create_qp(con, sess->dev->ib_pd, max_send_wr, max_recv_wr, + max_send_sge); if (err) { ib_free_cq(con->cq); con->cq = NULL; @@ -308,7 +310,7 @@ void rtrs_send_hb_ack(struct rtrs_sess *sess) imm = rtrs_to_imm(RTRS_HB_ACK_IMM, 0); err = rtrs_post_rdma_write_imm_empty(usr_con, sess->hb_cqe, imm, - IB_SEND_SIGNALED, NULL); + 0, NULL); if (err) { sess->hb_err_handler(usr_con); return; @@ -337,7 +339,7 @@ static void hb_work(struct work_struct *work) } imm = rtrs_to_imm(RTRS_HB_MSG_IMM, 0); err = rtrs_post_rdma_write_imm_empty(usr_con, sess->hb_cqe, imm, - IB_SEND_SIGNALED, NULL); + 0, NULL); if (err) { sess->hb_err_handler(usr_con); return; diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 5492b66a8153..31f8aa2c40ed 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3628,7 +3628,7 @@ static ssize_t srp_create_target(struct device *dev, struct srp_rdma_ch *ch; struct srp_device *srp_dev = host->srp_dev; struct ib_device *ibdev = srp_dev->dev; - int ret, node_idx, node, cpu, i; + int ret, i, ch_idx; unsigned int max_sectors_per_mr, mr_per_cmd = 0; bool multich = false; uint32_t max_iu_len; @@ -3753,81 +3753,61 @@ static ssize_t srp_create_target(struct device *dev, goto out; ret = -ENOMEM; - if (target->ch_count == 0) + if (target->ch_count == 0) { target->ch_count = - max_t(unsigned int, num_online_nodes(), - min(ch_count ?: - min(4 * num_online_nodes(), - ibdev->num_comp_vectors), - num_online_cpus())); + min(ch_count ?: + max(4 * num_online_nodes(), + ibdev->num_comp_vectors), + num_online_cpus()); + } + target->ch = kcalloc(target->ch_count, sizeof(*target->ch), GFP_KERNEL); if (!target->ch) goto out; - node_idx = 0; - for_each_online_node(node) { - const int ch_start = (node_idx * target->ch_count / - num_online_nodes()); - const int ch_end = ((node_idx + 1) * target->ch_count / - num_online_nodes()); - const int cv_start = node_idx * ibdev->num_comp_vectors / - num_online_nodes(); - const int cv_end = (node_idx + 1) * ibdev->num_comp_vectors / - num_online_nodes(); - int cpu_idx = 0; - - for_each_online_cpu(cpu) { - if (cpu_to_node(cpu) != node) - continue; - if (ch_start + cpu_idx >= ch_end) - continue; - ch = &target->ch[ch_start + cpu_idx]; - ch->target = target; - ch->comp_vector = cv_start == cv_end ? cv_start : - cv_start + cpu_idx % (cv_end - cv_start); - spin_lock_init(&ch->lock); - INIT_LIST_HEAD(&ch->free_tx); - ret = srp_new_cm_id(ch); - if (ret) - goto err_disconnect; + for (ch_idx = 0; ch_idx < target->ch_count; ++ch_idx) { + ch = &target->ch[ch_idx]; + ch->target = target; + ch->comp_vector = ch_idx % ibdev->num_comp_vectors; + spin_lock_init(&ch->lock); + INIT_LIST_HEAD(&ch->free_tx); + ret = srp_new_cm_id(ch); + if (ret) + goto err_disconnect; - ret = srp_create_ch_ib(ch); - if (ret) - goto err_disconnect; + ret = srp_create_ch_ib(ch); + if (ret) + goto err_disconnect; - ret = srp_alloc_req_data(ch); - if (ret) - goto err_disconnect; + ret = srp_alloc_req_data(ch); + if (ret) + goto err_disconnect; - ret = srp_connect_ch(ch, max_iu_len, multich); - if (ret) { - char dst[64]; - - if (target->using_rdma_cm) - snprintf(dst, sizeof(dst), "%pIS", - &target->rdma_cm.dst); - else - snprintf(dst, sizeof(dst), "%pI6", - target->ib_cm.orig_dgid.raw); - shost_printk(KERN_ERR, target->scsi_host, - PFX "Connection %d/%d to %s failed\n", - ch_start + cpu_idx, - target->ch_count, dst); - if (node_idx == 0 && cpu_idx == 0) { - goto free_ch; - } else { - srp_free_ch_ib(target, ch); - srp_free_req_data(target, ch); - target->ch_count = ch - target->ch; - goto connected; - } - } + ret = srp_connect_ch(ch, max_iu_len, multich); + if (ret) { + char dst[64]; - multich = true; - cpu_idx++; + if (target->using_rdma_cm) + snprintf(dst, sizeof(dst), "%pIS", + &target->rdma_cm.dst); + else + snprintf(dst, sizeof(dst), "%pI6", + target->ib_cm.orig_dgid.raw); + shost_printk(KERN_ERR, target->scsi_host, + PFX "Connection %d/%d to %s failed\n", + ch_idx, + target->ch_count, dst); + if (ch_idx == 0) { + goto free_ch; + } else { + srp_free_ch_ib(target, ch); + srp_free_req_data(target, ch); + target->ch_count = ch - target->ch; + goto connected; + } } - node_idx++; + multich = true; } connected: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 9eb51f06d3ae..50af84e76fb6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -56,6 +56,7 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, mkey->size = MLX5_GET64(mkc, mkc, len); mkey->key |= mlx5_idx_to_mkey(mkey_index); mkey->pd = MLX5_GET(mkc, mkc, pd); + init_waitqueue_head(&mkey->wait); mlx5_core_dbg(dev, "out 0x%x, mkey 0x%x\n", mkey_index, mkey->key); return 0; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 6ea8d67e3cb8..53b89631a1d9 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -307,13 +307,6 @@ struct mlx5_cmd { struct mlx5_cmd_stats *stats; }; -struct mlx5_port_caps { - int gid_table_len; - int pkey_table_len; - u8 ext_port_cap; - bool has_smi; -}; - struct mlx5_cmd_mailbox { void *buf; dma_addr_t dma; @@ -375,6 +368,8 @@ struct mlx5_core_mkey { u32 key; u32 pd; u32 type; + struct wait_queue_head wait; + refcount_t usecount; }; #define MLX5_24BIT_MASK ((1 << 24) - 1) @@ -713,7 +708,6 @@ struct mlx5_core_dev { u8 rev_id; char board_id[MLX5_BOARD_ID_LEN]; struct mlx5_cmd cmd; - struct mlx5_port_caps port_caps[MLX5_MAX_PORTS]; struct { u32 hca_cur[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)]; u32 hca_max[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)]; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6f0b866fb495..df5d91c8b2d4 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1661,7 +1661,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 sf_set_partition[0x1]; u8 reserved_at_682[0x1]; u8 log_max_sf[0x5]; - u8 reserved_at_688[0x8]; + u8 apu[0x1]; + u8 reserved_at_689[0x7]; u8 log_min_sf_size[0x8]; u8 max_num_sf_partitions[0x8]; @@ -3868,7 +3869,7 @@ struct mlx5_ifc_cqc_bits { u8 status[0x4]; u8 reserved_at_4[0x2]; u8 dbr_umem_valid[0x1]; - u8 reserved_at_7[0x1]; + u8 apu_thread_cq[0x1]; u8 cqe_sz[0x3]; u8 cc[0x1]; u8 reserved_at_c[0x1]; diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index 693285e76f13..4c52c2fd22a1 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -547,10 +547,6 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client, void *context), void *context, struct ib_sa_query **sa_query); -bool ib_sa_sendonly_fullmem_support(struct ib_sa_client *client, - struct ib_device *device, - u8 port_num); - static inline bool sa_path_is_roce(struct sa_path_rec *rec) { return ((rec->rec_type == SA_PATH_REC_TYPE_ROCE_V1) || diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 7752211c9638..676c57f5ca80 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2007 Cisco Systems. All rights reserved. + * Copyright (c) 2020 Intel Corporation. All rights reserved. */ #ifndef IB_UMEM_H @@ -13,6 +14,7 @@ struct ib_ucontext; struct ib_umem_odp; +struct dma_buf_attach_ops; struct ib_umem { struct ib_device *ibdev; @@ -22,12 +24,29 @@ struct ib_umem { unsigned long address; u32 writable : 1; u32 is_odp : 1; + u32 is_dmabuf : 1; struct work_struct work; struct sg_table sg_head; int nmap; unsigned int sg_nents; }; +struct ib_umem_dmabuf { + struct ib_umem umem; + struct dma_buf_attachment *attach; + struct sg_table *sgt; + struct scatterlist *first_sg; + struct scatterlist *last_sg; + unsigned long first_sg_offset; + unsigned long last_sg_trim; + void *private; +}; + +static inline struct ib_umem_dmabuf *to_ib_umem_dmabuf(struct ib_umem *umem) +{ + return container_of(umem, struct ib_umem_dmabuf, umem); +} + /* Returns the offset of the umem start relative to the first page. */ static inline int ib_umem_offset(struct ib_umem *umem) { @@ -86,6 +105,7 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, unsigned long pgsz_bitmap, unsigned long virt); + /** * ib_umem_find_best_pgoff - Find best HW page size * @@ -116,6 +136,14 @@ static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem, dma_addr & pgoff_bitmask); } +struct ib_umem_dmabuf *ib_umem_dmabuf_get(struct ib_device *device, + unsigned long offset, size_t size, + int fd, int access, + const struct dma_buf_attach_ops *ops); +int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf); +void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf); +void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf); + #else /* CONFIG_INFINIBAND_USER_MEM */ #include <linux/err.h> @@ -124,12 +152,12 @@ static inline struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, size_t size, int access) { - return ERR_PTR(-EINVAL); + return ERR_PTR(-EOPNOTSUPP); } static inline void ib_umem_release(struct ib_umem *umem) { } static inline int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, size_t length) { - return -EINVAL; + return -EOPNOTSUPP; } static inline unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, unsigned long pgsz_bitmap, @@ -143,7 +171,21 @@ static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem, { return 0; } +static inline +struct ib_umem_dmabuf *ib_umem_dmabuf_get(struct ib_device *device, + unsigned long offset, + size_t size, int fd, + int access, + struct dma_buf_attach_ops *ops) +{ + return ERR_PTR(-EOPNOTSUPP); +} +static inline int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf) +{ + return -EOPNOTSUPP; +} +static inline void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf) { } +static inline void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf) { } #endif /* CONFIG_INFINIBAND_USER_MEM */ - #endif /* IB_UMEM_H */ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 9fed65bf9279..ca28fca5736b 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2,7 +2,7 @@ /* * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2004 Infinicon Corporation. All rights reserved. - * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004, 2020 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. @@ -2434,6 +2434,10 @@ struct ib_device_ops { struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_udata *udata); + struct ib_mr *(*reg_user_mr_dmabuf)(struct ib_pd *pd, u64 offset, + u64 length, u64 virt_addr, int fd, + int mr_access_flags, + struct ib_udata *udata); struct ib_mr *(*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_pd *pd, @@ -4670,4 +4674,7 @@ static inline u32 rdma_calc_flow_label(u32 lqpn, u32 rqpn) return (u32)(v & IB_GRH_FLOWLABEL_MASK); } + +const struct ib_port_immutable* +ib_port_immutable_read(struct ib_device *dev, unsigned int port); #endif /* IB_VERBS_H */ diff --git a/include/rdma/rdma_counter.h b/include/rdma/rdma_counter.h index eb99856e8b30..e75cf9742e04 100644 --- a/include/rdma/rdma_counter.h +++ b/include/rdma/rdma_counter.h @@ -46,7 +46,8 @@ struct rdma_counter { void rdma_counter_init(struct ib_device *dev); void rdma_counter_release(struct ib_device *dev); int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port, - bool on, enum rdma_nl_counter_mask mask); + enum rdma_nl_counter_mask mask, + struct netlink_ext_ack *extack); int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port); int rdma_counter_unbind_qp(struct ib_qp *qp, bool force); diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h index 7968a1845355..dafc7ebe545b 100644 --- a/include/uapi/rdma/ib_user_ioctl_cmds.h +++ b/include/uapi/rdma/ib_user_ioctl_cmds.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2020, Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -251,6 +252,7 @@ enum uverbs_methods_mr { UVERBS_METHOD_MR_DESTROY, UVERBS_METHOD_ADVISE_MR, UVERBS_METHOD_QUERY_MR, + UVERBS_METHOD_REG_DMABUF_MR, }; enum uverbs_attrs_mr_destroy_ids { @@ -272,6 +274,18 @@ enum uverbs_attrs_query_mr_cmd_attr_ids { UVERBS_ATTR_QUERY_MR_RESP_IOVA, }; +enum uverbs_attrs_reg_dmabuf_mr_cmd_attr_ids { + UVERBS_ATTR_REG_DMABUF_MR_HANDLE, + UVERBS_ATTR_REG_DMABUF_MR_PD_HANDLE, + UVERBS_ATTR_REG_DMABUF_MR_OFFSET, + UVERBS_ATTR_REG_DMABUF_MR_LENGTH, + UVERBS_ATTR_REG_DMABUF_MR_IOVA, + UVERBS_ATTR_REG_DMABUF_MR_FD, + UVERBS_ATTR_REG_DMABUF_MR_ACCESS_FLAGS, + UVERBS_ATTR_REG_DMABUF_MR_RESP_LKEY, + UVERBS_ATTR_REG_DMABUF_MR_RESP_RKEY, +}; + enum uverbs_attrs_create_counters_cmd_attr_ids { UVERBS_ATTR_CREATE_COUNTERS_HANDLE, }; diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c index 71c960dcd8a4..652254754b4c 100644 --- a/tools/testing/scatterlist/main.c +++ b/tools/testing/scatterlist/main.c @@ -55,7 +55,6 @@ int main(void) struct test *test, tests[] = { { -EINVAL, 1, pfn(0), NULL, PAGE_SIZE, 0, 1 }, { 0, 1, pfn(0), NULL, PAGE_SIZE, PAGE_SIZE + 1, 1 }, - { 0, 1, pfn(0), NULL, PAGE_SIZE, sgmax + 1, 1 }, { 0, 1, pfn(0), NULL, PAGE_SIZE, sgmax, 1 }, { 0, 1, pfn(0), NULL, 1, sgmax, 1 }, { 0, 2, pfn(0, 1), NULL, 2 * PAGE_SIZE, sgmax, 1 }, |