summaryrefslogtreecommitdiffstats
path: root/net/rds/tcp.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-07-27 12:03:20 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2016-07-27 12:03:20 -0700
commit468fc7ed5537615efe671d94248446ac24679773 (patch)
tree27bc9de792e863d6ec1630927b77ac9e7dabb38a /net/rds/tcp.c
parent08fd8c17686c6b09fa410a26d516548dd80ff147 (diff)
parent36232012344b8db67052432742deaf17f82e70e6 (diff)
downloadlinux-stable-468fc7ed5537615efe671d94248446ac24679773.tar.gz
linux-stable-468fc7ed5537615efe671d94248446ac24679773.tar.bz2
linux-stable-468fc7ed5537615efe671d94248446ac24679773.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) Unified UDP encapsulation offload methods for drivers, from Alexander Duyck. 2) Make DSA binding more sane, from Andrew Lunn. 3) Support QCA9888 chips in ath10k, from Anilkumar Kolli. 4) Several workqueue usage cleanups, from Bhaktipriya Shridhar. 5) Add XDP (eXpress Data Path), essentially running BPF programs on RX packets as soon as the device sees them, with the option to mirror the packet on TX via the same interface. From Brenden Blanco and others. 6) Allow qdisc/class stats dumps to run lockless, from Eric Dumazet. 7) Add VLAN support to b53 and bcm_sf2, from Florian Fainelli. 8) Simplify netlink conntrack entry layout, from Florian Westphal. 9) Add ipv4 forwarding support to mlxsw spectrum driver, from Ido Schimmel, Yotam Gigi, and Jiri Pirko. 10) Add SKB array infrastructure and convert tun and macvtap over to it. From Michael S Tsirkin and Jason Wang. 11) Support qdisc packet injection in pktgen, from John Fastabend. 12) Add neighbour monitoring framework to TIPC, from Jon Paul Maloy. 13) Add NV congestion control support to TCP, from Lawrence Brakmo. 14) Add GSO support to SCTP, from Marcelo Ricardo Leitner. 15) Allow GRO and RPS to function on macsec devices, from Paolo Abeni. 16) Support MPLS over IPV4, from Simon Horman. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1622 commits) xgene: Fix build warning with ACPI disabled. be2net: perform temperature query in adapter regardless of its interface state l2tp: Correctly return -EBADF from pppol2tp_getname. net/mlx5_core/health: Remove deprecated create_singlethread_workqueue net: ipmr/ip6mr: update lastuse on entry change macsec: ensure rx_sa is set when validation is disabled tipc: dump monitor attributes tipc: add a function to get the bearer name tipc: get monitor threshold for the cluster tipc: make cluster size threshold for monitoring configurable tipc: introduce constants for tipc address validation net: neigh: disallow transition to NUD_STALE if lladdr is unchanged in neigh_update() MAINTAINERS: xgene: Add driver and documentation path Documentation: dtb: xgene: Add MDIO node dtb: xgene: Add MDIO node drivers: net: xgene: ethtool: Use phy_ethtool_gset and sset drivers: net: xgene: Use exported functions drivers: net: xgene: Enable MDIO driver drivers: net: xgene: Add backward compatibility drivers: net: phy: xgene: Add MDIO driver ...
Diffstat (limited to 'net/rds/tcp.c')
-rw-r--r--net/rds/tcp.c160
1 files changed, 94 insertions, 66 deletions
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index c8a7b4c90190..fcddacc92e01 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -56,8 +56,8 @@ static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp,
loff_t *fpos);
-int rds_tcp_min_sndbuf = SOCK_MIN_SNDBUF;
-int rds_tcp_min_rcvbuf = SOCK_MIN_RCVBUF;
+static int rds_tcp_min_sndbuf = SOCK_MIN_SNDBUF;
+static int rds_tcp_min_rcvbuf = SOCK_MIN_RCVBUF;
static struct ctl_table rds_tcp_sysctl_table[] = {
#define RDS_TCP_SNDBUF 0
@@ -135,9 +135,9 @@ void rds_tcp_restore_callbacks(struct socket *sock,
* from being called while it isn't set.
*/
void rds_tcp_reset_callbacks(struct socket *sock,
- struct rds_connection *conn)
+ struct rds_conn_path *cp)
{
- struct rds_tcp_connection *tc = conn->c_transport_data;
+ struct rds_tcp_connection *tc = cp->cp_transport_data;
struct socket *osock = tc->t_sock;
if (!osock)
@@ -147,8 +147,8 @@ void rds_tcp_reset_callbacks(struct socket *sock,
* We have an outstanding SYN to this peer, which may
* potentially have transitioned to the RDS_CONN_UP state,
* so we must quiesce any send threads before resetting
- * c_transport_data. We quiesce these threads by setting
- * c_state to something other than RDS_CONN_UP, and then
+ * cp_transport_data. We quiesce these threads by setting
+ * cp_state to something other than RDS_CONN_UP, and then
* waiting for any existing threads in rds_send_xmit to
* complete release_in_xmit(). (Subsequent threads entering
* rds_send_xmit() will bail on !rds_conn_up().
@@ -163,38 +163,25 @@ void rds_tcp_reset_callbacks(struct socket *sock,
* RDS_CONN_RESETTTING, to ensure that rds_tcp_state_change
* cannot mark rds_conn_path_up() in the window before lock_sock()
*/
- atomic_set(&conn->c_state, RDS_CONN_RESETTING);
- wait_event(conn->c_waitq, !test_bit(RDS_IN_XMIT, &conn->c_flags));
+ atomic_set(&cp->cp_state, RDS_CONN_RESETTING);
+ wait_event(cp->cp_waitq, !test_bit(RDS_IN_XMIT, &cp->cp_flags));
lock_sock(osock->sk);
/* reset receive side state for rds_tcp_data_recv() for osock */
+ cancel_delayed_work_sync(&cp->cp_send_w);
+ cancel_delayed_work_sync(&cp->cp_recv_w);
if (tc->t_tinc) {
rds_inc_put(&tc->t_tinc->ti_inc);
tc->t_tinc = NULL;
}
tc->t_tinc_hdr_rem = sizeof(struct rds_header);
tc->t_tinc_data_rem = 0;
- tc->t_sock = NULL;
-
- write_lock_bh(&osock->sk->sk_callback_lock);
-
- osock->sk->sk_user_data = NULL;
- osock->sk->sk_data_ready = tc->t_orig_data_ready;
- osock->sk->sk_write_space = tc->t_orig_write_space;
- osock->sk->sk_state_change = tc->t_orig_state_change;
- write_unlock_bh(&osock->sk->sk_callback_lock);
+ rds_tcp_restore_callbacks(osock, tc);
release_sock(osock->sk);
sock_release(osock);
newsock:
- rds_send_reset(conn);
+ rds_send_path_reset(cp);
lock_sock(sock->sk);
- write_lock_bh(&sock->sk->sk_callback_lock);
- tc->t_sock = sock;
- sock->sk->sk_user_data = conn;
- sock->sk->sk_data_ready = rds_tcp_data_ready;
- sock->sk->sk_write_space = rds_tcp_write_space;
- sock->sk->sk_state_change = rds_tcp_state_change;
-
- write_unlock_bh(&sock->sk->sk_callback_lock);
+ rds_tcp_set_callbacks(sock, cp);
release_sock(sock->sk);
}
@@ -202,9 +189,9 @@ newsock:
* above rds_tcp_reset_callbacks for notes about synchronization
* with data path
*/
-void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn)
+void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp)
{
- struct rds_tcp_connection *tc = conn->c_transport_data;
+ struct rds_tcp_connection *tc = cp->cp_transport_data;
rdsdebug("setting sock %p callbacks to tc %p\n", sock, tc);
write_lock_bh(&sock->sk->sk_callback_lock);
@@ -220,12 +207,12 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn)
sock->sk->sk_data_ready = sock->sk->sk_user_data;
tc->t_sock = sock;
- tc->conn = conn;
+ tc->t_cpath = cp;
tc->t_orig_data_ready = sock->sk->sk_data_ready;
tc->t_orig_write_space = sock->sk->sk_write_space;
tc->t_orig_state_change = sock->sk->sk_state_change;
- sock->sk->sk_user_data = conn;
+ sock->sk->sk_user_data = cp;
sock->sk->sk_data_ready = rds_tcp_data_ready;
sock->sk->sk_write_space = rds_tcp_write_space;
sock->sk->sk_state_change = rds_tcp_state_change;
@@ -283,24 +270,29 @@ static int rds_tcp_laddr_check(struct net *net, __be32 addr)
static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
{
struct rds_tcp_connection *tc;
+ int i;
- tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp);
- if (!tc)
- return -ENOMEM;
+ for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+ tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp);
+ if (!tc)
+ return -ENOMEM;
- mutex_init(&tc->t_conn_lock);
- tc->t_sock = NULL;
- tc->t_tinc = NULL;
- tc->t_tinc_hdr_rem = sizeof(struct rds_header);
- tc->t_tinc_data_rem = 0;
+ mutex_init(&tc->t_conn_path_lock);
+ tc->t_sock = NULL;
+ tc->t_tinc = NULL;
+ tc->t_tinc_hdr_rem = sizeof(struct rds_header);
+ tc->t_tinc_data_rem = 0;
- conn->c_transport_data = tc;
+ conn->c_path[i].cp_transport_data = tc;
+ tc->t_cpath = &conn->c_path[i];
- spin_lock_irq(&rds_tcp_conn_lock);
- list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list);
- spin_unlock_irq(&rds_tcp_conn_lock);
+ spin_lock_irq(&rds_tcp_conn_lock);
+ list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list);
+ spin_unlock_irq(&rds_tcp_conn_lock);
+ rdsdebug("rds_conn_path [%d] tc %p\n", i,
+ conn->c_path[i].cp_transport_data);
+ }
- rdsdebug("alloced tc %p\n", conn->c_transport_data);
return 0;
}
@@ -317,6 +309,17 @@ static void rds_tcp_conn_free(void *arg)
kmem_cache_free(rds_tcp_conn_slab, tc);
}
+static bool list_has_conn(struct list_head *list, struct rds_connection *conn)
+{
+ struct rds_tcp_connection *tc, *_tc;
+
+ list_for_each_entry_safe(tc, _tc, list, t_tcp_node) {
+ if (tc->t_cpath->cp_conn == conn)
+ return true;
+ }
+ return false;
+}
+
static void rds_tcp_destroy_conns(void)
{
struct rds_tcp_connection *tc, *_tc;
@@ -324,29 +327,28 @@ static void rds_tcp_destroy_conns(void)
/* avoid calling conn_destroy with irqs off */
spin_lock_irq(&rds_tcp_conn_lock);
- list_splice(&rds_tcp_conn_list, &tmp_list);
- INIT_LIST_HEAD(&rds_tcp_conn_list);
+ list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
+ if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn))
+ list_move_tail(&tc->t_tcp_node, &tmp_list);
+ }
spin_unlock_irq(&rds_tcp_conn_lock);
- list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) {
- if (tc->conn->c_passive)
- rds_conn_destroy(tc->conn->c_passive);
- rds_conn_destroy(tc->conn);
- }
+ list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node)
+ rds_conn_destroy(tc->t_cpath->cp_conn);
}
static void rds_tcp_exit(void);
struct rds_transport rds_tcp_transport = {
.laddr_check = rds_tcp_laddr_check,
- .xmit_prepare = rds_tcp_xmit_prepare,
- .xmit_complete = rds_tcp_xmit_complete,
+ .xmit_path_prepare = rds_tcp_xmit_path_prepare,
+ .xmit_path_complete = rds_tcp_xmit_path_complete,
.xmit = rds_tcp_xmit,
- .recv = rds_tcp_recv,
+ .recv_path = rds_tcp_recv_path,
.conn_alloc = rds_tcp_conn_alloc,
.conn_free = rds_tcp_conn_free,
- .conn_connect = rds_tcp_conn_connect,
- .conn_shutdown = rds_tcp_conn_shutdown,
+ .conn_path_connect = rds_tcp_conn_path_connect,
+ .conn_path_shutdown = rds_tcp_conn_path_shutdown,
.inc_copy_to_user = rds_tcp_inc_copy_to_user,
.inc_free = rds_tcp_inc_free,
.stats_info_copy = rds_tcp_stats_info_copy,
@@ -355,6 +357,7 @@ struct rds_transport rds_tcp_transport = {
.t_name = "tcp",
.t_type = RDS_TRANS_TCP,
.t_prefer_loopback = 1,
+ .t_mp_capable = 1,
};
static int rds_tcp_netid;
@@ -488,10 +491,30 @@ static struct pernet_operations rds_tcp_net_ops = {
.size = sizeof(struct rds_tcp_net),
};
+/* explicitly send a RST on each socket, thereby releasing any socket refcnts
+ * that may otherwise hold up netns deletion.
+ */
+static void rds_tcp_conn_paths_destroy(struct rds_connection *conn)
+{
+ struct rds_conn_path *cp;
+ struct rds_tcp_connection *tc;
+ int i;
+ struct sock *sk;
+
+ for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+ cp = &conn->c_path[i];
+ tc = cp->cp_transport_data;
+ if (!tc->t_sock)
+ continue;
+ sk = tc->t_sock->sk;
+ sk->sk_prot->disconnect(sk, 0);
+ tcp_done(sk);
+ }
+}
+
static void rds_tcp_kill_sock(struct net *net)
{
struct rds_tcp_connection *tc, *_tc;
- struct sock *sk;
LIST_HEAD(tmp_list);
struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
@@ -500,23 +523,27 @@ static void rds_tcp_kill_sock(struct net *net)
flush_work(&rtn->rds_tcp_accept_w);
spin_lock_irq(&rds_tcp_conn_lock);
list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
- struct net *c_net = read_pnet(&tc->conn->c_net);
+ struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
if (net != c_net || !tc->t_sock)
continue;
- list_move_tail(&tc->t_tcp_node, &tmp_list);
+ if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn))
+ list_move_tail(&tc->t_tcp_node, &tmp_list);
}
spin_unlock_irq(&rds_tcp_conn_lock);
list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) {
- sk = tc->t_sock->sk;
- sk->sk_prot->disconnect(sk, 0);
- tcp_done(sk);
- if (tc->conn->c_passive)
- rds_conn_destroy(tc->conn->c_passive);
- rds_conn_destroy(tc->conn);
+ rds_tcp_conn_paths_destroy(tc->t_cpath->cp_conn);
+ rds_conn_destroy(tc->t_cpath->cp_conn);
}
}
+void *rds_tcp_listen_sock_def_readable(struct net *net)
+{
+ struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+
+ return rtn->rds_tcp_listen_sock->sk->sk_user_data;
+}
+
static int rds_tcp_dev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
@@ -551,12 +578,13 @@ static void rds_tcp_sysctl_reset(struct net *net)
spin_lock_irq(&rds_tcp_conn_lock);
list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
- struct net *c_net = read_pnet(&tc->conn->c_net);
+ struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
if (net != c_net || !tc->t_sock)
continue;
- rds_conn_drop(tc->conn); /* reconnect with new parameters */
+ /* reconnect with new parameters */
+ rds_conn_path_drop(tc->t_cpath);
}
spin_unlock_irq(&rds_tcp_conn_lock);
}