diff options
Diffstat (limited to 'include/rdma/ib_verbs.h')
-rw-r--r-- | include/rdma/ib_verbs.h | 257 |
1 files changed, 163 insertions, 94 deletions
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 6c003995347a..e950c2a68f06 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -94,6 +94,7 @@ enum ib_gid_type { struct ib_gid_attr { struct net_device *ndev; struct ib_device *device; + union ib_gid gid; enum ib_gid_type gid_type; u16 index; u8 port_num; @@ -148,13 +149,13 @@ static inline enum ib_gid_type ib_network_to_gid_type(enum rdma_network_type net return IB_GID_TYPE_IB; } -static inline enum rdma_network_type ib_gid_to_network_type(enum ib_gid_type gid_type, - union ib_gid *gid) +static inline enum rdma_network_type +rdma_gid_attr_network_type(const struct ib_gid_attr *attr) { - if (gid_type == IB_GID_TYPE_IB) + if (attr->gid_type == IB_GID_TYPE_IB) return RDMA_NETWORK_IB; - if (ipv6_addr_v4mapped((struct in6_addr *)gid)) + if (ipv6_addr_v4mapped((struct in6_addr *)&attr->gid)) return RDMA_NETWORK_IPV4; else return RDMA_NETWORK_IPV6; @@ -344,7 +345,8 @@ struct ib_device_attr { int max_qp; int max_qp_wr; u64 device_cap_flags; - int max_sge; + int max_send_sge; + int max_recv_sge; int max_sge_rd; int max_cq; int max_cqe; @@ -430,33 +432,6 @@ enum ib_port_state { IB_PORT_ACTIVE_DEFER = 5 }; -enum ib_port_cap_flags { - IB_PORT_SM = 1 << 1, - IB_PORT_NOTICE_SUP = 1 << 2, - IB_PORT_TRAP_SUP = 1 << 3, - IB_PORT_OPT_IPD_SUP = 1 << 4, - IB_PORT_AUTO_MIGR_SUP = 1 << 5, - IB_PORT_SL_MAP_SUP = 1 << 6, - IB_PORT_MKEY_NVRAM = 1 << 7, - IB_PORT_PKEY_NVRAM = 1 << 8, - IB_PORT_LED_INFO_SUP = 1 << 9, - IB_PORT_SM_DISABLED = 1 << 10, - IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11, - IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12, - IB_PORT_EXTENDED_SPEEDS_SUP = 1 << 14, - IB_PORT_CM_SUP = 1 << 16, - IB_PORT_SNMP_TUNNEL_SUP = 1 << 17, - IB_PORT_REINIT_SUP = 1 << 18, - IB_PORT_DEVICE_MGMT_SUP = 1 << 19, - IB_PORT_VENDOR_CLASS_SUP = 1 << 20, - IB_PORT_DR_NOTICE_SUP = 1 << 21, - IB_PORT_CAP_MASK_NOTICE_SUP = 1 << 22, - IB_PORT_BOOT_MGMT_SUP = 1 << 23, - IB_PORT_LINK_LATENCY_SUP = 1 << 24, - IB_PORT_CLIENT_REG_SUP = 1 << 25, - IB_PORT_IP_BASED_GIDS = 1 << 26, -}; - enum ib_port_width { IB_WIDTH_1X = 1, IB_WIDTH_4X = 2, @@ -554,6 +529,7 @@ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct( #define RDMA_CORE_CAP_AF_IB 0x00001000 #define RDMA_CORE_CAP_ETH_AH 0x00002000 #define RDMA_CORE_CAP_OPA_AH 0x00004000 +#define RDMA_CORE_CAP_IB_GRH_REQUIRED 0x00008000 /* Protocol 0xFFF00000 */ #define RDMA_CORE_CAP_PROT_IB 0x00100000 @@ -563,6 +539,10 @@ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct( #define RDMA_CORE_CAP_PROT_RAW_PACKET 0x01000000 #define RDMA_CORE_CAP_PROT_USNIC 0x02000000 +#define RDMA_CORE_PORT_IB_GRH_REQUIRED (RDMA_CORE_CAP_IB_GRH_REQUIRED \ + | RDMA_CORE_CAP_PROT_ROCE \ + | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP) + #define RDMA_CORE_PORT_IBA_IB (RDMA_CORE_CAP_PROT_IB \ | RDMA_CORE_CAP_IB_MAD \ | RDMA_CORE_CAP_IB_SMI \ @@ -595,6 +575,8 @@ struct ib_port_attr { enum ib_mtu max_mtu; enum ib_mtu active_mtu; int gid_tbl_len; + unsigned int ip_gids:1; + /* This is the value from PortInfo CapabilityMask, defined by IBA */ u32 port_cap_flags; u32 max_msg_sz; u32 bad_pkey_cntr; @@ -610,7 +592,6 @@ struct ib_port_attr { u8 active_width; u8 active_speed; u8 phys_state; - bool grh_required; }; enum ib_device_modify_flags { @@ -689,6 +670,7 @@ struct ib_event_handler { } while (0) struct ib_global_route { + const struct ib_gid_attr *sgid_attr; union ib_gid dgid; u32 flow_label; u8 sgid_index; @@ -1370,7 +1352,7 @@ struct ib_rdma_wr { u32 rkey; }; -static inline struct ib_rdma_wr *rdma_wr(struct ib_send_wr *wr) +static inline const struct ib_rdma_wr *rdma_wr(const struct ib_send_wr *wr) { return container_of(wr, struct ib_rdma_wr, wr); } @@ -1385,7 +1367,7 @@ struct ib_atomic_wr { u32 rkey; }; -static inline struct ib_atomic_wr *atomic_wr(struct ib_send_wr *wr) +static inline const struct ib_atomic_wr *atomic_wr(const struct ib_send_wr *wr) { return container_of(wr, struct ib_atomic_wr, wr); } @@ -1402,7 +1384,7 @@ struct ib_ud_wr { u8 port_num; /* valid for DR SMPs on switch only */ }; -static inline struct ib_ud_wr *ud_wr(struct ib_send_wr *wr) +static inline const struct ib_ud_wr *ud_wr(const struct ib_send_wr *wr) { return container_of(wr, struct ib_ud_wr, wr); } @@ -1414,7 +1396,7 @@ struct ib_reg_wr { int access; }; -static inline struct ib_reg_wr *reg_wr(struct ib_send_wr *wr) +static inline const struct ib_reg_wr *reg_wr(const struct ib_send_wr *wr) { return container_of(wr, struct ib_reg_wr, wr); } @@ -1427,7 +1409,8 @@ struct ib_sig_handover_wr { struct ib_sge *prot; }; -static inline struct ib_sig_handover_wr *sig_handover_wr(struct ib_send_wr *wr) +static inline const struct ib_sig_handover_wr * +sig_handover_wr(const struct ib_send_wr *wr) { return container_of(wr, struct ib_sig_handover_wr, wr); } @@ -1443,14 +1426,16 @@ struct ib_recv_wr { }; enum ib_access_flags { - IB_ACCESS_LOCAL_WRITE = 1, - IB_ACCESS_REMOTE_WRITE = (1<<1), - IB_ACCESS_REMOTE_READ = (1<<2), - IB_ACCESS_REMOTE_ATOMIC = (1<<3), - IB_ACCESS_MW_BIND = (1<<4), - IB_ZERO_BASED = (1<<5), - IB_ACCESS_ON_DEMAND = (1<<6), - IB_ACCESS_HUGETLB = (1<<7), + IB_ACCESS_LOCAL_WRITE = IB_UVERBS_ACCESS_LOCAL_WRITE, + IB_ACCESS_REMOTE_WRITE = IB_UVERBS_ACCESS_REMOTE_WRITE, + IB_ACCESS_REMOTE_READ = IB_UVERBS_ACCESS_REMOTE_READ, + IB_ACCESS_REMOTE_ATOMIC = IB_UVERBS_ACCESS_REMOTE_ATOMIC, + IB_ACCESS_MW_BIND = IB_UVERBS_ACCESS_MW_BIND, + IB_ZERO_BASED = IB_UVERBS_ACCESS_ZERO_BASED, + IB_ACCESS_ON_DEMAND = IB_UVERBS_ACCESS_ON_DEMAND, + IB_ACCESS_HUGETLB = IB_UVERBS_ACCESS_HUGETLB, + + IB_ACCESS_SUPPORTED = ((IB_ACCESS_HUGETLB << 1) - 1) }; /* @@ -1473,14 +1458,17 @@ struct ib_fmr_attr { struct ib_umem; enum rdma_remove_reason { - /* Userspace requested uobject deletion. Call could fail */ + /* + * Userspace requested uobject deletion or initial try + * to remove uobject via cleanup. Call could fail + */ RDMA_REMOVE_DESTROY, /* Context deletion. This call should delete the actual object itself */ RDMA_REMOVE_CLOSE, /* Driver is being hot-unplugged. This call should delete the actual object itself */ RDMA_REMOVE_DRIVER_REMOVE, - /* Context is being cleaned-up, but commit was just completed */ - RDMA_REMOVE_DURING_CLEANUP, + /* uobj is being cleaned-up before being committed */ + RDMA_REMOVE_ABORT, }; struct ib_rdmacg_object { @@ -1492,14 +1480,14 @@ struct ib_rdmacg_object { struct ib_ucontext { struct ib_device *device; struct ib_uverbs_file *ufile; + /* + * 'closing' can be read by the driver only during a destroy callback, + * it is set when we are closing the file descriptor and indicates + * that mm_sem may be locked. + */ int closing; - /* locking the uobjects_list */ - struct mutex uobjects_lock; - struct list_head uobjects; - /* protects cleanup process from other actions */ - struct rw_semaphore cleanup_rwsem; - enum rdma_remove_reason cleanup_reason; + bool cleanup_retryable; struct pid *tgid; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING @@ -1524,6 +1512,9 @@ struct ib_ucontext { struct ib_uobject { u64 user_handle; /* handle given to us by userspace */ + /* ufile & ucontext owning this object */ + struct ib_uverbs_file *ufile; + /* FIXME, save memory: ufile->context == context */ struct ib_ucontext *context; /* associated user context */ void *object; /* containing object */ struct list_head list; /* link to context's list */ @@ -1533,13 +1524,7 @@ struct ib_uobject { atomic_t usecnt; /* protects exclusive access */ struct rcu_head rcu; /* kfree_rcu() overhead */ - const struct uverbs_obj_type *type; -}; - -struct ib_uobject_file { - struct ib_uobject uobj; - /* ufile contains the lock between context release and file close */ - struct ib_uverbs_file *ufile; + const struct uverbs_api_object *uapi_object; }; struct ib_udata { @@ -1578,6 +1563,7 @@ struct ib_ah { struct ib_device *device; struct ib_pd *pd; struct ib_uobject *uobject; + const struct ib_gid_attr *sgid_attr; enum rdma_ah_attr_type type; }; @@ -1776,6 +1762,9 @@ struct ib_qp { struct ib_uobject *uobject; void (*event_handler)(struct ib_event *, void *); void *qp_context; + /* sgid_attrs associated with the AV's */ + const struct ib_gid_attr *av_sgid_attr; + const struct ib_gid_attr *alt_path_sgid_attr; u32 qp_num; u32 max_write_sge; u32 max_read_sge; @@ -2098,6 +2087,7 @@ struct ib_flow_attr { struct ib_flow { struct ib_qp *qp; + struct ib_device *device; struct ib_uobject *uobject; }; @@ -2213,7 +2203,11 @@ struct rdma_netdev { struct ib_device *hca; u8 port_num; - /* cleanup function must be specified */ + /* + * cleanup function must be specified. + * FIXME: This is only used for OPA_VNIC and that usage should be + * removed too. + */ void (*free_rdma_netdev)(struct net_device *netdev); /* control functions */ @@ -2242,11 +2236,6 @@ struct ib_counters { atomic_t usecnt; }; -enum ib_read_counters_flags { - /* prefer read values from driver cache */ - IB_READ_COUNTERS_ATTR_PREFER_CACHED = 1 << 0, -}; - struct ib_counters_read_attr { u64 *counters_buff; u32 ncounters; @@ -2341,8 +2330,7 @@ struct ib_device { * concurrently for different ports. This function is only called when * roce_gid_table is used. */ - int (*add_gid)(const union ib_gid *gid, - const struct ib_gid_attr *attr, + int (*add_gid)(const struct ib_gid_attr *attr, void **context); /* When calling del_gid, the HW vendor's driver should delete the * gid of device @device at gid index gid_index of port port_num @@ -2389,8 +2377,8 @@ struct ib_device { struct ib_srq_attr *srq_attr); int (*destroy_srq)(struct ib_srq *srq); int (*post_srq_recv)(struct ib_srq *srq, - struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr); + const struct ib_recv_wr *recv_wr, + const struct ib_recv_wr **bad_recv_wr); struct ib_qp * (*create_qp)(struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr, struct ib_udata *udata); @@ -2404,11 +2392,11 @@ struct ib_device { struct ib_qp_init_attr *qp_init_attr); int (*destroy_qp)(struct ib_qp *qp); int (*post_send)(struct ib_qp *qp, - struct ib_send_wr *send_wr, - struct ib_send_wr **bad_send_wr); + const struct ib_send_wr *send_wr, + const struct ib_send_wr **bad_send_wr); int (*post_recv)(struct ib_qp *qp, - struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr); + const struct ib_recv_wr *recv_wr, + const struct ib_recv_wr **bad_recv_wr); struct ib_cq * (*create_cq)(struct ib_device *device, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, @@ -2592,7 +2580,7 @@ struct ib_device { const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev, int comp_vector); - struct uverbs_root_spec *specs_root; + const struct uverbs_object_tree_def *const *driver_specs; enum rdma_driver_id driver_id; }; @@ -2679,6 +2667,46 @@ static inline bool ib_is_udata_cleared(struct ib_udata *udata, } /** + * ib_is_destroy_retryable - Check whether the uobject destruction + * is retryable. + * @ret: The initial destruction return code + * @why: remove reason + * @uobj: The uobject that is destroyed + * + * This function is a helper function that IB layer and low-level drivers + * can use to consider whether the destruction of the given uobject is + * retry-able. + * It checks the original return code, if it wasn't success the destruction + * is retryable according to the ucontext state (i.e. cleanup_retryable) and + * the remove reason. (i.e. why). + * Must be called with the object locked for destroy. + */ +static inline bool ib_is_destroy_retryable(int ret, enum rdma_remove_reason why, + struct ib_uobject *uobj) +{ + return ret && (why == RDMA_REMOVE_DESTROY || + uobj->context->cleanup_retryable); +} + +/** + * ib_destroy_usecnt - Called during destruction to check the usecnt + * @usecnt: The usecnt atomic + * @why: remove reason + * @uobj: The uobject that is destroyed + * + * Non-zero usecnts will block destruction unless destruction was triggered by + * a ucontext cleanup. + */ +static inline int ib_destroy_usecnt(atomic_t *usecnt, + enum rdma_remove_reason why, + struct ib_uobject *uobj) +{ + if (atomic_read(usecnt) && ib_is_destroy_retryable(-EBUSY, why, uobj)) + return -EBUSY; + return 0; +} + +/** * ib_modify_qp_is_ok - Check that the supplied attribute mask * contains all required attributes and no attributes not allowed for * the given QP state transition. @@ -2755,6 +2783,13 @@ static inline int rdma_is_port_valid(const struct ib_device *device, port <= rdma_end_port(device)); } +static inline bool rdma_is_grh_required(const struct ib_device *device, + u8 port_num) +{ + return device->port_immutable[port_num].core_cap_flags & + RDMA_CORE_PORT_IB_GRH_REQUIRED; +} + static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num) { return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IB; @@ -3046,10 +3081,6 @@ static inline bool rdma_cap_read_inv(struct ib_device *dev, u32 port_num) return rdma_protocol_iwarp(dev, port_num); } -int ib_query_gid(struct ib_device *device, - u8 port_num, int index, union ib_gid *gid, - struct ib_gid_attr *attr); - int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port, int state); int ib_get_vf_config(struct ib_device *device, int vf, u8 port, @@ -3148,6 +3179,13 @@ int ib_get_rdma_header_version(const union rdma_network_hdr *hdr); * ignored unless the work completion indicates that the GRH is valid. * @ah_attr: Returned attributes that can be used when creating an address * handle for replying to the message. + * When ib_init_ah_attr_from_wc() returns success, + * (a) for IB link layer it optionally contains a reference to SGID attribute + * when GRH is present for IB link layer. + * (b) for RoCE link layer it contains a reference to SGID attribute. + * User must invoke rdma_cleanup_ah_attr_gid_attr() to release reference to SGID + * attributes which are initialized using ib_init_ah_attr_from_wc(). + * */ int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num, const struct ib_wc *wc, const struct ib_grh *grh, @@ -3247,10 +3285,12 @@ int ib_destroy_srq(struct ib_srq *srq); * the work request that failed to be posted on the QP. */ static inline int ib_post_srq_recv(struct ib_srq *srq, - struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr) + const struct ib_recv_wr *recv_wr, + const struct ib_recv_wr **bad_recv_wr) { - return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr); + const struct ib_recv_wr *dummy; + + return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr ? : &dummy); } /** @@ -3348,10 +3388,12 @@ int ib_close_qp(struct ib_qp *qp); * earlier work requests in the list. */ static inline int ib_post_send(struct ib_qp *qp, - struct ib_send_wr *send_wr, - struct ib_send_wr **bad_send_wr) + const struct ib_send_wr *send_wr, + const struct ib_send_wr **bad_send_wr) { - return qp->device->post_send(qp, send_wr, bad_send_wr); + const struct ib_send_wr *dummy; + + return qp->device->post_send(qp, send_wr, bad_send_wr ? : &dummy); } /** @@ -3363,10 +3405,12 @@ static inline int ib_post_send(struct ib_qp *qp, * the work request that failed to be posted on the QP. */ static inline int ib_post_recv(struct ib_qp *qp, - struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr) + const struct ib_recv_wr *recv_wr, + const struct ib_recv_wr **bad_recv_wr) { - return qp->device->post_recv(qp, recv_wr, bad_recv_wr); + const struct ib_recv_wr *dummy; + + return qp->device->post_recv(qp, recv_wr, bad_recv_wr ? : &dummy); } struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, @@ -3801,10 +3845,6 @@ struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller); */ int ib_dealloc_xrcd(struct ib_xrcd *xrcd); -struct ib_flow *ib_create_flow(struct ib_qp *qp, - struct ib_flow_attr *flow_attr, int domain); -int ib_destroy_flow(struct ib_flow *flow_id); - static inline int ib_check_mr_access(int flags) { /* @@ -4033,8 +4073,19 @@ static inline void rdma_ah_set_grh(struct rdma_ah_attr *attr, grh->sgid_index = sgid_index; grh->hop_limit = hop_limit; grh->traffic_class = traffic_class; + grh->sgid_attr = NULL; } +void rdma_destroy_ah_attr(struct rdma_ah_attr *ah_attr); +void rdma_move_grh_sgid_attr(struct rdma_ah_attr *attr, union ib_gid *dgid, + u32 flow_label, u8 hop_limit, u8 traffic_class, + const struct ib_gid_attr *sgid_attr); +void rdma_copy_ah_attr(struct rdma_ah_attr *dest, + const struct rdma_ah_attr *src); +void rdma_replace_ah_attr(struct rdma_ah_attr *old, + const struct rdma_ah_attr *new); +void rdma_move_ah_attr(struct rdma_ah_attr *dest, struct rdma_ah_attr *src); + /** * rdma_ah_find_type - Return address handle type. * @@ -4102,6 +4153,20 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector) } +static inline void ib_set_flow(struct ib_uobject *uobj, struct ib_flow *ibflow, + struct ib_qp *qp, struct ib_device *device) +{ + uobj->object = ibflow; + ibflow->uobject = uobj; + + if (qp) { + atomic_inc(&qp->usecnt); + ibflow->qp = qp; + } + + ibflow->device = device; +} + /** * rdma_roce_rescan_device - Rescan all of the network devices in the system * and add their gids, as needed, to the relevant RoCE devices. @@ -4110,4 +4175,8 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector) */ void rdma_roce_rescan_device(struct ib_device *ibdev); +struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile); + +int uverbs_destroy_def_handler(struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs); #endif /* IB_VERBS_H */ |