summaryrefslogtreecommitdiffstats
path: root/net/smc/smc_clc.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-04-03 14:04:18 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-04-03 14:04:18 -0700
commit5bb053bef82523a8fd78d650bca81c9f114fa276 (patch)
tree58c2fe47f60bb69230bb05d57a6c9e3f47f7b1fe /net/smc/smc_clc.c
parentbb2407a7219760926760f0448fddf00d625e5aec (diff)
parent159f02977b2feb18a4bece5e586c838a6d26d44b (diff)
downloadlinux-5bb053bef82523a8fd78d650bca81c9f114fa276.tar.gz
linux-5bb053bef82523a8fd78d650bca81c9f114fa276.tar.bz2
linux-5bb053bef82523a8fd78d650bca81c9f114fa276.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) Support offloading wireless authentication to userspace via NL80211_CMD_EXTERNAL_AUTH, from Srinivas Dasari. 2) A lot of work on network namespace setup/teardown from Kirill Tkhai. Setup and cleanup of namespaces now all run asynchronously and thus performance is significantly increased. 3) Add rx/tx timestamping support to mv88e6xxx driver, from Brandon Streiff. 4) Support zerocopy on RDS sockets, from Sowmini Varadhan. 5) Use denser instruction encoding in x86 eBPF JIT, from Daniel Borkmann. 6) Support hw offload of vlan filtering in mvpp2 dreiver, from Maxime Chevallier. 7) Support grafting of child qdiscs in mlxsw driver, from Nogah Frankel. 8) Add packet forwarding tests to selftests, from Ido Schimmel. 9) Deal with sub-optimal GSO packets better in BBR congestion control, from Eric Dumazet. 10) Support 5-tuple hashing in ipv6 multipath routing, from David Ahern. 11) Add path MTU tests to selftests, from Stefano Brivio. 12) Various bits of IPSEC offloading support for mlx5, from Aviad Yehezkel, Yossi Kuperman, and Saeed Mahameed. 13) Support RSS spreading on ntuple filters in SFC driver, from Edward Cree. 14) Lots of sockmap work from John Fastabend. Applications can use eBPF to filter sendmsg and sendpage operations. 15) In-kernel receive TLS support, from Dave Watson. 16) Add XDP support to ixgbevf, this is significant because it should allow optimized XDP usage in various cloud environments. From Tony Nguyen. 17) Add new Intel E800 series "ice" ethernet driver, from Anirudh Venkataramanan et al. 18) IP fragmentation match offload support in nfp driver, from Pieter Jansen van Vuuren. 19) Support XDP redirect in i40e driver, from Björn Töpel. 20) Add BPF_RAW_TRACEPOINT program type for accessing the arguments of tracepoints in their raw form, from Alexei Starovoitov. 21) Lots of striding RQ improvements to mlx5 driver with many performance improvements, from Tariq Toukan. 22) Use rhashtable for inet frag reassembly, from Eric Dumazet. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1678 commits) net: mvneta: improve suspend/resume net: mvneta: split rxq/txq init and txq deinit into SW and HW parts ipv6: frags: fix /proc/sys/net/ipv6/ip6frag_low_thresh net: bgmac: Fix endian access in bgmac_dma_tx_ring_free() net: bgmac: Correctly annotate register space route: check sysctl_fib_multipath_use_neigh earlier than hash fix typo in command value in drivers/net/phy/mdio-bitbang. sky2: Increase D3 delay to sky2 stops working after suspend net/mlx5e: Set EQE based as default TX interrupt moderation mode ibmvnic: Disable irqs before exiting reset from closed state net: sched: do not emit messages while holding spinlock vlan: also check phy_driver ts_info for vlan's real device Bluetooth: Mark expected switch fall-throughs Bluetooth: Set HCI_QUIRK_SIMULTANEOUS_DISCOVERY for BTUSB_QCA_ROME Bluetooth: btrsi: remove unused including <linux/version.h> Bluetooth: hci_bcm: Remove DMI quirk for the MINIX Z83-4 sh_eth: kill useless check in __sh_eth_get_regs() sh_eth: add sh_eth_cpu_data::no_xdfar flag ipv6: factorize sk_wmem_alloc updates done by __ip6_append_data() ipv4: factorize sk_wmem_alloc updates done by __ip_append_data() ...
Diffstat (limited to 'net/smc/smc_clc.c')
-rw-r--r--net/smc/smc_clc.c214
1 files changed, 196 insertions, 18 deletions
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 15c213250606..3a988c22f627 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -5,15 +5,17 @@
* CLC (connection layer control) handshake over initial TCP socket to
* prepare for RDMA traffic
*
- * Copyright IBM Corp. 2016
+ * Copyright IBM Corp. 2016, 2018
*
* Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
*/
#include <linux/in.h>
+#include <linux/inetdevice.h>
#include <linux/if_ether.h>
#include <linux/sched/signal.h>
+#include <net/addrconf.h>
#include <net/sock.h>
#include <net/tcp.h>
@@ -22,6 +24,9 @@
#include "smc_clc.h"
#include "smc_ib.h"
+/* eye catcher "SMCR" EBCDIC for CLC messages */
+static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
+
/* check if received message has a correct header length and contains valid
* heading and trailing eyecatchers
*/
@@ -70,6 +75,172 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
return true;
}
+/* find ipv4 addr on device and get the prefix len, fill CLC proposal msg */
+static int smc_clc_prfx_set4_rcu(struct dst_entry *dst, __be32 ipv4,
+ struct smc_clc_msg_proposal_prefix *prop)
+{
+ struct in_device *in_dev = __in_dev_get_rcu(dst->dev);
+
+ if (!in_dev)
+ return -ENODEV;
+ for_ifa(in_dev) {
+ if (!inet_ifa_match(ipv4, ifa))
+ continue;
+ prop->prefix_len = inet_mask_len(ifa->ifa_mask);
+ prop->outgoing_subnet = ifa->ifa_address & ifa->ifa_mask;
+ /* prop->ipv6_prefixes_cnt = 0; already done by memset before */
+ return 0;
+ } endfor_ifa(in_dev);
+ return -ENOENT;
+}
+
+/* fill CLC proposal msg with ipv6 prefixes from device */
+static int smc_clc_prfx_set6_rcu(struct dst_entry *dst,
+ struct smc_clc_msg_proposal_prefix *prop,
+ struct smc_clc_ipv6_prefix *ipv6_prfx)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ struct inet6_dev *in6_dev = __in6_dev_get(dst->dev);
+ struct inet6_ifaddr *ifa;
+ int cnt = 0;
+
+ if (!in6_dev)
+ return -ENODEV;
+ /* use a maximum of 8 IPv6 prefixes from device */
+ list_for_each_entry(ifa, &in6_dev->addr_list, if_list) {
+ if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)
+ continue;
+ ipv6_addr_prefix(&ipv6_prfx[cnt].prefix,
+ &ifa->addr, ifa->prefix_len);
+ ipv6_prfx[cnt].prefix_len = ifa->prefix_len;
+ cnt++;
+ if (cnt == SMC_CLC_MAX_V6_PREFIX)
+ break;
+ }
+ prop->ipv6_prefixes_cnt = cnt;
+ if (cnt)
+ return 0;
+#endif
+ return -ENOENT;
+}
+
+/* retrieve and set prefixes in CLC proposal msg */
+static int smc_clc_prfx_set(struct socket *clcsock,
+ struct smc_clc_msg_proposal_prefix *prop,
+ struct smc_clc_ipv6_prefix *ipv6_prfx)
+{
+ struct dst_entry *dst = sk_dst_get(clcsock->sk);
+ struct sockaddr_storage addrs;
+ struct sockaddr_in6 *addr6;
+ struct sockaddr_in *addr;
+ int rc = -ENOENT;
+
+ memset(prop, 0, sizeof(*prop));
+ if (!dst) {
+ rc = -ENOTCONN;
+ goto out;
+ }
+ if (!dst->dev) {
+ rc = -ENODEV;
+ goto out_rel;
+ }
+ /* get address to which the internal TCP socket is bound */
+ kernel_getsockname(clcsock, (struct sockaddr *)&addrs);
+ /* analyze IP specific data of net_device belonging to TCP socket */
+ addr6 = (struct sockaddr_in6 *)&addrs;
+ rcu_read_lock();
+ if (addrs.ss_family == PF_INET) {
+ /* IPv4 */
+ addr = (struct sockaddr_in *)&addrs;
+ rc = smc_clc_prfx_set4_rcu(dst, addr->sin_addr.s_addr, prop);
+ } else if (ipv6_addr_v4mapped(&addr6->sin6_addr)) {
+ /* mapped IPv4 address - peer is IPv4 only */
+ rc = smc_clc_prfx_set4_rcu(dst, addr6->sin6_addr.s6_addr32[3],
+ prop);
+ } else {
+ /* IPv6 */
+ rc = smc_clc_prfx_set6_rcu(dst, prop, ipv6_prfx);
+ }
+ rcu_read_unlock();
+out_rel:
+ dst_release(dst);
+out:
+ return rc;
+}
+
+/* match ipv4 addrs of dev against addr in CLC proposal */
+static int smc_clc_prfx_match4_rcu(struct net_device *dev,
+ struct smc_clc_msg_proposal_prefix *prop)
+{
+ struct in_device *in_dev = __in_dev_get_rcu(dev);
+
+ if (!in_dev)
+ return -ENODEV;
+ for_ifa(in_dev) {
+ if (prop->prefix_len == inet_mask_len(ifa->ifa_mask) &&
+ inet_ifa_match(prop->outgoing_subnet, ifa))
+ return 0;
+ } endfor_ifa(in_dev);
+
+ return -ENOENT;
+}
+
+/* match ipv6 addrs of dev against addrs in CLC proposal */
+static int smc_clc_prfx_match6_rcu(struct net_device *dev,
+ struct smc_clc_msg_proposal_prefix *prop)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ struct inet6_dev *in6_dev = __in6_dev_get(dev);
+ struct smc_clc_ipv6_prefix *ipv6_prfx;
+ struct inet6_ifaddr *ifa;
+ int i, max;
+
+ if (!in6_dev)
+ return -ENODEV;
+ /* ipv6 prefix list starts behind smc_clc_msg_proposal_prefix */
+ ipv6_prfx = (struct smc_clc_ipv6_prefix *)((u8 *)prop + sizeof(*prop));
+ max = min_t(u8, prop->ipv6_prefixes_cnt, SMC_CLC_MAX_V6_PREFIX);
+ list_for_each_entry(ifa, &in6_dev->addr_list, if_list) {
+ if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)
+ continue;
+ for (i = 0; i < max; i++) {
+ if (ifa->prefix_len == ipv6_prfx[i].prefix_len &&
+ ipv6_prefix_equal(&ifa->addr, &ipv6_prfx[i].prefix,
+ ifa->prefix_len))
+ return 0;
+ }
+ }
+#endif
+ return -ENOENT;
+}
+
+/* check if proposed prefixes match one of our device prefixes */
+int smc_clc_prfx_match(struct socket *clcsock,
+ struct smc_clc_msg_proposal_prefix *prop)
+{
+ struct dst_entry *dst = sk_dst_get(clcsock->sk);
+ int rc;
+
+ if (!dst) {
+ rc = -ENOTCONN;
+ goto out;
+ }
+ if (!dst->dev) {
+ rc = -ENODEV;
+ goto out_rel;
+ }
+ rcu_read_lock();
+ if (!prop->ipv6_prefixes_cnt)
+ rc = smc_clc_prfx_match4_rcu(dst->dev, prop);
+ else
+ rc = smc_clc_prfx_match6_rcu(dst->dev, prop);
+ rcu_read_unlock();
+out_rel:
+ dst_release(dst);
+out:
+ return rc;
+}
+
/* Wait for data on the tcp-socket, analyze received data
* Returns:
* 0 if success and it was not a decline that we received.
@@ -189,16 +360,24 @@ int smc_clc_send_proposal(struct smc_sock *smc,
struct smc_ib_device *smcibdev,
u8 ibport)
{
+ struct smc_clc_ipv6_prefix ipv6_prfx[SMC_CLC_MAX_V6_PREFIX];
struct smc_clc_msg_proposal_prefix pclc_prfx;
struct smc_clc_msg_proposal pclc;
struct smc_clc_msg_trail trl;
+ int len, i, plen, rc;
int reason_code = 0;
- struct kvec vec[3];
+ struct kvec vec[4];
struct msghdr msg;
- int len, plen, rc;
+
+ /* retrieve ip prefixes for CLC proposal msg */
+ rc = smc_clc_prfx_set(smc->clcsock, &pclc_prfx, ipv6_prfx);
+ if (rc)
+ return SMC_CLC_DECL_CNFERR; /* configuration error */
/* send SMC Proposal CLC message */
- plen = sizeof(pclc) + sizeof(pclc_prfx) + sizeof(trl);
+ plen = sizeof(pclc) + sizeof(pclc_prfx) +
+ (pclc_prfx.ipv6_prefixes_cnt * sizeof(ipv6_prfx[0])) +
+ sizeof(trl);
memset(&pclc, 0, sizeof(pclc));
memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
pclc.hdr.type = SMC_CLC_PROPOSAL;
@@ -209,23 +388,22 @@ int smc_clc_send_proposal(struct smc_sock *smc,
memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN);
pclc.iparea_offset = htons(0);
- memset(&pclc_prfx, 0, sizeof(pclc_prfx));
- /* determine subnet and mask from internal TCP socket */
- rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet,
- &pclc_prfx.prefix_len);
- if (rc)
- return SMC_CLC_DECL_CNFERR; /* configuration error */
- pclc_prfx.ipv6_prefixes_cnt = 0;
memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
memset(&msg, 0, sizeof(msg));
- vec[0].iov_base = &pclc;
- vec[0].iov_len = sizeof(pclc);
- vec[1].iov_base = &pclc_prfx;
- vec[1].iov_len = sizeof(pclc_prfx);
- vec[2].iov_base = &trl;
- vec[2].iov_len = sizeof(trl);
+ i = 0;
+ vec[i].iov_base = &pclc;
+ vec[i++].iov_len = sizeof(pclc);
+ vec[i].iov_base = &pclc_prfx;
+ vec[i++].iov_len = sizeof(pclc_prfx);
+ if (pclc_prfx.ipv6_prefixes_cnt > 0) {
+ vec[i].iov_base = &ipv6_prfx[0];
+ vec[i++].iov_len = pclc_prfx.ipv6_prefixes_cnt *
+ sizeof(ipv6_prfx[0]);
+ }
+ vec[i].iov_base = &trl;
+ vec[i++].iov_len = sizeof(trl);
/* due to the few bytes needed for clc-handshake this cannot block */
- len = kernel_sendmsg(smc->clcsock, &msg, vec, 3, plen);
+ len = kernel_sendmsg(smc->clcsock, &msg, vec, i, plen);
if (len < sizeof(pclc)) {
if (len >= 0) {
reason_code = -ENETUNREACH;