From 09b93088d75009807b72293f26e2634430ce5ba9 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 11 May 2014 15:15:11 +0300 Subject: IB: Add a QP creation flag to use GFP_NOIO allocations This addresses a problem where NFS client writes over IPoIB connected mode may deadlock on memory allocation/writeback. The problem is not directly memory reclamation. There is an indirect dependency between network filesystems writing back pages and ipoib_cm_tx_init() due to how a kworker is used. Page reclaim cannot make forward progress until ipoib_cm_tx_init() succeeds and it is stuck in page reclaim itself waiting for network transmission. Ordinarily this situation may be avoided by having the caller use GFP_NOFS but ipoib_cm_tx_init() does not have that information. To address this, take a general approach and add a new QP creation flag that tells the low-level hardware driver to use GFP_NOIO for the memory allocations related to the new QP. Use the new flag in the ipoib connected mode path, and if the driver doesn't support it, re-issue the QP creation without the flag. Signed-off-by: Mel Gorman Signed-off-by: Jiri Kosina Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- include/rdma/ib_verbs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/rdma') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index acd825182977..d75b02f9c80d 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -783,6 +783,7 @@ enum ib_qp_create_flags { IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1, IB_QP_CREATE_NETIF_QP = 1 << 5, IB_QP_CREATE_SIGNATURE_EN = 1 << 6, + IB_QP_CREATE_USE_GFP_NOIO = 1 << 7, /* reserve bits 26-31 for low level drivers' internal use */ IB_QP_CREATE_RESERVED_START = 1 << 26, IB_QP_CREATE_RESERVED_END = 1 << 31, -- cgit v1.2.3 From 8385fd841468868e0b37a722530d75b0e8bfc5a8 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 4 Jun 2014 10:00:16 -0700 Subject: IB/core: Fix sparse warnings about redeclared functions Fix a few functions that are declared with __attribute_const__ in the ib_verbs.h header file but defined without it in verbs.c. This gets rid of the following sparse warnings: drivers/infiniband/core/verbs.c:51:5: error: symbol 'ib_rate_to_mult' redeclared with different type (originally declared at include/rdma/ib_verbs.h:469) - different modifiers drivers/infiniband/core/verbs.c:68:14: error: symbol 'mult_to_ib_rate' redeclared with different type (originally declared at include/rdma/ib_verbs.h:607) - different modifiers drivers/infiniband/core/verbs.c:85:5: error: symbol 'ib_rate_to_mbps' redeclared with different type (originally declared at include/rdma/ib_verbs.h:476) - different modifiers drivers/infiniband/core/verbs.c:111:1: error: symbol 'rdma_node_get_transport' redeclared with different type (originally declared at include/rdma/ib_verbs.h:84) - different modifiers Signed-off-by: Roland Dreier --- include/rdma/ib_verbs.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index acd825182977..e9da1bc89d25 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -80,8 +80,8 @@ enum rdma_transport_type { RDMA_TRANSPORT_USNIC_UDP }; -enum rdma_transport_type -rdma_node_get_transport(enum rdma_node_type node_type) __attribute_const__; +__attribute_const__ enum rdma_transport_type +rdma_node_get_transport(enum rdma_node_type node_type); enum rdma_link_layer { IB_LINK_LAYER_UNSPECIFIED, @@ -466,14 +466,14 @@ enum ib_rate { * converted to 2, since 5 Gbit/sec is 2 * 2.5 Gbit/sec. * @rate: rate to convert. */ -int ib_rate_to_mult(enum ib_rate rate) __attribute_const__; +__attribute_const__ int ib_rate_to_mult(enum ib_rate rate); /** * ib_rate_to_mbps - Convert the IB rate enum to Mbps. * For example, IB_RATE_2_5_GBPS will be converted to 2500. * @rate: rate to convert. */ -int ib_rate_to_mbps(enum ib_rate rate) __attribute_const__; +__attribute_const__ int ib_rate_to_mbps(enum ib_rate rate); enum ib_mr_create_flags { IB_MR_SIGNATURE_EN = 1, @@ -604,7 +604,7 @@ struct ib_mr_status { * enum. * @mult: multiple to convert. */ -enum ib_rate mult_to_ib_rate(int mult) __attribute_const__; +__attribute_const__ enum ib_rate mult_to_ib_rate(int mult); struct ib_ah_attr { struct ib_global_route grh; -- cgit v1.2.3 From 30dc5e63d6a5ad24894b5512d10b228d73645a44 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Wed, 26 Mar 2014 17:07:35 -0500 Subject: RDMA/core: Add support for iWARP Port Mapper user space service This patch adds iWARP Port Mapper (IWPM) Version 2 support. The iWARP Port Mapper implementation is based on the port mapper specification section in the Sockets Direct Protocol paper - http://www.rdmaconsortium.org/home/draft-pinkerton-iwarp-sdp-v1.0.pdf Existing iWARP RDMA providers use the same IP address as the native TCP/IP stack when creating RDMA connections. They need a mechanism to claim the TCP ports used for RDMA connections to prevent TCP port collisions when other host applications use TCP ports. The iWARP Port Mapper provides a standard mechanism to accomplish this. Without this service it is possible for RDMA application to bind/listen on the same port which is already being used by native TCP host application. If that happens the incoming TCP connection data can be passed to the RDMA stack with error. The iWARP Port Mapper solution doesn't contain any changes to the existing network stack in the kernel space. All the changes are contained with the infiniband tree and also in user space. The iWARP Port Mapper service is implemented as a user space daemon process. Source for the IWPM service is located at http://git.openfabrics.org/git?p=~tnikolova/libiwpm-1.0.0/.git;a=summary The iWARP driver (port mapper client) sends to the IWPM service the local IP address and TCP port it has received from the RDMA application, when starting a connection. The IWPM service performs a socket bind from user space to get an available TCP port, called a mapped port, and communicates it back to the client. In that sense, the IWPM service is used to map the TCP port, which the RDMA application uses to any port available from the host TCP port space. The mapped ports are used in iWARP RDMA connections to avoid collisions with native TCP stack which is aware that these ports are taken. When an RDMA connection using a mapped port is terminated, the client notifies the IWPM service, which then releases the TCP port. The message exchange between the IWPM service and the iWARP drivers (between user space and kernel space) is implemented using netlink sockets. 1) Netlink interface functions are added: ibnl_unicast() and ibnl_mulitcast() for sending netlink messages to user space 2) The signature of the existing ibnl_put_msg() is changed to be more generic 3) Two netlink clients are added: RDMA_NL_NES, RDMA_NL_C4IW corresponding to the two iWarp drivers - nes and cxgb4 which use the IWPM service 4) Enums are added to enumerate the attributes in the netlink messages, which are exchanged between the user space IWPM service and the iWARP drivers Signed-off-by: Tatyana Nikolova Signed-off-by: Steve Wise Reviewed-by: PJ Waskiewicz [ Fold in range checking fixes and nlh_next removal as suggested by Dan Carpenter and Steve Wise. Fix sparse endianness in hash. - Roland ] Signed-off-by: Roland Dreier --- include/rdma/iw_portmap.h | 199 ++++++++++++++++++++++++++++++++++++++++++++ include/rdma/rdma_netlink.h | 23 ++++- 2 files changed, 221 insertions(+), 1 deletion(-) create mode 100644 include/rdma/iw_portmap.h (limited to 'include/rdma') diff --git a/include/rdma/iw_portmap.h b/include/rdma/iw_portmap.h new file mode 100644 index 000000000000..928b2775e992 --- /dev/null +++ b/include/rdma/iw_portmap.h @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2014 Intel Corporation. All rights reserved. + * Copyright (c) 2014 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _IW_PORTMAP_H +#define _IW_PORTMAP_H + +#define IWPM_ULIBNAME_SIZE 32 +#define IWPM_DEVNAME_SIZE 32 +#define IWPM_IFNAME_SIZE 16 +#define IWPM_IPADDR_SIZE 16 + +enum { + IWPM_INVALID_NLMSG_ERR = 10, + IWPM_CREATE_MAPPING_ERR, + IWPM_DUPLICATE_MAPPING_ERR, + IWPM_UNKNOWN_MAPPING_ERR, + IWPM_CLIENT_DEV_INFO_ERR, + IWPM_USER_LIB_INFO_ERR, + IWPM_REMOTE_QUERY_REJECT +}; + +struct iwpm_dev_data { + char dev_name[IWPM_DEVNAME_SIZE]; + char if_name[IWPM_IFNAME_SIZE]; +}; + +struct iwpm_sa_data { + struct sockaddr_storage loc_addr; + struct sockaddr_storage mapped_loc_addr; + struct sockaddr_storage rem_addr; + struct sockaddr_storage mapped_rem_addr; +}; + +/** + * iwpm_init - Allocate resources for the iwarp port mapper + * + * Should be called when network interface goes up. + */ +int iwpm_init(u8); + +/** + * iwpm_exit - Deallocate resources for the iwarp port mapper + * + * Should be called when network interface goes down. + */ +int iwpm_exit(u8); + +/** + * iwpm_valid_pid - Check if the userspace iwarp port mapper pid is valid + * + * Returns true if the pid is greater than zero, otherwise returns false + */ +int iwpm_valid_pid(void); + +/** + * iwpm_register_pid - Send a netlink query to userspace + * to get the iwarp port mapper pid + * @pm_msg: Contains driver info to send to the userspace port mapper + * @nl_client: The index of the netlink client + */ +int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client); + +/** + * iwpm_add_mapping - Send a netlink add mapping request to + * the userspace port mapper + * @pm_msg: Contains the local ip/tcp address info to send + * @nl_client: The index of the netlink client + * + * If the request is successful, the pm_msg stores + * the port mapper response (mapped address info) + */ +int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client); + +/** + * iwpm_add_and_query_mapping - Send a netlink add and query mapping request + * to the userspace port mapper + * @pm_msg: Contains the local and remote ip/tcp address info to send + * @nl_client: The index of the netlink client + * + * If the request is successful, the pm_msg stores the + * port mapper response (mapped local and remote address info) + */ +int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client); + +/** + * iwpm_remove_mapping - Send a netlink remove mapping request + * to the userspace port mapper + * + * @local_addr: Local ip/tcp address to remove + * @nl_client: The index of the netlink client + */ +int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client); + +/** + * iwpm_register_pid_cb - Process the port mapper response to + * iwpm_register_pid query + * @skb: + * @cb: Contains the received message (payload and netlink header) + * + * If successful, the function receives the userspace port mapper pid + * which is used in future communication with the port mapper + */ +int iwpm_register_pid_cb(struct sk_buff *, struct netlink_callback *); + +/** + * iwpm_add_mapping_cb - Process the port mapper response to + * iwpm_add_mapping request + * @skb: + * @cb: Contains the received message (payload and netlink header) + */ +int iwpm_add_mapping_cb(struct sk_buff *, struct netlink_callback *); + +/** + * iwpm_add_and_query_mapping_cb - Process the port mapper response to + * iwpm_add_and_query_mapping request + * @skb: + * @cb: Contains the received message (payload and netlink header) + */ +int iwpm_add_and_query_mapping_cb(struct sk_buff *, struct netlink_callback *); + +/** + * iwpm_mapping_error_cb - Process port mapper notification for error + * + * @skb: + * @cb: Contains the received message (payload and netlink header) + */ +int iwpm_mapping_error_cb(struct sk_buff *, struct netlink_callback *); + +/** + * iwpm_mapping_info_cb - Process a notification that the userspace + * port mapper daemon is started + * @skb: + * @cb: Contains the received message (payload and netlink header) + * + * Using the received port mapper pid, send all the local mapping + * info records to the userspace port mapper + */ +int iwpm_mapping_info_cb(struct sk_buff *, struct netlink_callback *); + +/** + * iwpm_ack_mapping_info_cb - Process the port mapper ack for + * the provided local mapping info records + * @skb: + * @cb: Contains the received message (payload and netlink header) + */ +int iwpm_ack_mapping_info_cb(struct sk_buff *, struct netlink_callback *); + +/** + * iwpm_create_mapinfo - Store local and mapped IPv4/IPv6 address + * info in a hash table + * @local_addr: Local ip/tcp address + * @mapped_addr: Mapped local ip/tcp address + * @nl_client: The index of the netlink client + */ +int iwpm_create_mapinfo(struct sockaddr_storage *local_addr, + struct sockaddr_storage *mapped_addr, u8 nl_client); + +/** + * iwpm_remove_mapinfo - Remove local and mapped IPv4/IPv6 address + * info from the hash table + * @local_addr: Local ip/tcp address + * @mapped_addr: Mapped local ip/tcp address + * + * Returns err code if mapping info is not found in the hash table, + * otherwise returns 0 + */ +int iwpm_remove_mapinfo(struct sockaddr_storage *local_addr, + struct sockaddr_storage *mapped_addr); + +#endif /* _IW_PORTMAP_H */ diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index e38de79eeb48..0790882e0c9b 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -43,7 +43,7 @@ int ibnl_remove_client(int index); * Returns the allocated buffer on success and NULL on failure. */ void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq, - int len, int client, int op); + int len, int client, int op, int flags); /** * Put a new attribute in a supplied skb. * @skb: The netlink skb. @@ -56,4 +56,25 @@ void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq, int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh, int len, void *data, int type); +/** + * Send the supplied skb to a specific userspace PID. + * @skb: The netlink skb + * @nlh: Header of the netlink message to send + * @pid: Userspace netlink process ID + * Returns 0 on success or a negative error code. + */ +int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh, + __u32 pid); + +/** + * Send the supplied skb to a netlink group. + * @skb: The netlink skb + * @nlh: Header of the netlink message to send + * @group: Netlink group ID + * @flags: allocation flags + * Returns 0 on success or a negative error code. + */ +int ibnl_multicast(struct sk_buff *skb, struct nlmsghdr *nlh, + unsigned int group, gfp_t flags); + #endif /* _RDMA_NETLINK_H */ -- cgit v1.2.3