diff options
Diffstat (limited to 'net/rds')
-rw-r--r-- | net/rds/bind.c | 1 | ||||
-rw-r--r-- | net/rds/cong.c | 10 | ||||
-rw-r--r-- | net/rds/connection.c | 27 | ||||
-rw-r--r-- | net/rds/rds.h | 10 | ||||
-rw-r--r-- | net/rds/send.c | 37 | ||||
-rw-r--r-- | net/rds/tcp.c | 81 | ||||
-rw-r--r-- | net/rds/tcp.h | 1 | ||||
-rw-r--r-- | net/rds/tcp_connect.c | 2 | ||||
-rw-r--r-- | net/rds/tcp_recv.c | 8 | ||||
-rw-r--r-- | net/rds/tcp_send.c | 5 | ||||
-rw-r--r-- | net/rds/threads.c | 20 |
11 files changed, 128 insertions, 74 deletions
diff --git a/net/rds/bind.c b/net/rds/bind.c index 75d43dc8e96b..5aa3a64aa4f0 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -114,6 +114,7 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port) rs, &addr, (int)ntohs(*port)); break; } else { + rs->rs_bound_addr = 0; rds_sock_put(rs); ret = -ENOMEM; break; diff --git a/net/rds/cong.c b/net/rds/cong.c index 8398fee7c866..8d19fd25dce3 100644 --- a/net/rds/cong.c +++ b/net/rds/cong.c @@ -219,7 +219,11 @@ void rds_cong_queue_updates(struct rds_cong_map *map) spin_lock_irqsave(&rds_cong_lock, flags); list_for_each_entry(conn, &map->m_conn_list, c_map_item) { - if (!test_and_set_bit(0, &conn->c_map_queued)) { + struct rds_conn_path *cp = &conn->c_path[0]; + + rcu_read_lock(); + if (!test_and_set_bit(0, &conn->c_map_queued) && + !test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) { rds_stats_inc(s_cong_update_queued); /* We cannot inline the call to rds_send_xmit() here * for two reasons (both pertaining to a TCP transport): @@ -235,9 +239,9 @@ void rds_cong_queue_updates(struct rds_cong_map *map) * therefore trigger warnings. * Defer the xmit to rds_send_worker() instead. */ - queue_delayed_work(rds_wq, - &conn->c_path[0].cp_send_w, 0); + queue_delayed_work(rds_wq, &cp->cp_send_w, 0); } + rcu_read_unlock(); } spin_unlock_irqrestore(&rds_cong_lock, flags); diff --git a/net/rds/connection.c b/net/rds/connection.c index 7ee2d5d68b78..b10c0ef36d8d 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -230,8 +230,8 @@ static struct rds_connection *__rds_conn_create(struct net *net, rdsdebug("allocated conn %p for %pI4 -> %pI4 over %s %s\n", conn, &laddr, &faddr, - trans->t_name ? trans->t_name : "[unknown]", - is_outgoing ? "(outgoing)" : ""); + strnlen(trans->t_name, sizeof(trans->t_name)) ? trans->t_name : + "[unknown]", is_outgoing ? "(outgoing)" : ""); /* * Since we ran without holding the conn lock, someone could @@ -382,10 +382,13 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp) { struct rds_message *rm, *rtmp; + set_bit(RDS_DESTROY_PENDING, &cp->cp_flags); + if (!cp->cp_transport_data) return; /* make sure lingering queued work won't try to ref the conn */ + synchronize_rcu(); cancel_delayed_work_sync(&cp->cp_send_w); cancel_delayed_work_sync(&cp->cp_recv_w); @@ -403,6 +406,11 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp) if (cp->cp_xmit_rm) rds_message_put(cp->cp_xmit_rm); + WARN_ON(delayed_work_pending(&cp->cp_send_w)); + WARN_ON(delayed_work_pending(&cp->cp_recv_w)); + WARN_ON(delayed_work_pending(&cp->cp_conn_w)); + WARN_ON(work_pending(&cp->cp_down_w)); + cp->cp_conn->c_trans->conn_free(cp->cp_transport_data); } @@ -424,7 +432,6 @@ void rds_conn_destroy(struct rds_connection *conn) "%pI4\n", conn, &conn->c_laddr, &conn->c_faddr); - conn->c_destroy_in_prog = 1; /* Ensure conn will not be scheduled for reconnect */ spin_lock_irq(&rds_conn_lock); hlist_del_init_rcu(&conn->c_hash_node); @@ -445,7 +452,6 @@ void rds_conn_destroy(struct rds_connection *conn) */ rds_cong_remove_conn(conn); - put_net(conn->c_net); kfree(conn->c_path); kmem_cache_free(rds_conn_slab, conn); @@ -684,10 +690,13 @@ void rds_conn_path_drop(struct rds_conn_path *cp, bool destroy) { atomic_set(&cp->cp_state, RDS_CONN_ERROR); - if (!destroy && cp->cp_conn->c_destroy_in_prog) + rcu_read_lock(); + if (!destroy && test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) { + rcu_read_unlock(); return; - + } queue_work(rds_wq, &cp->cp_down_w); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(rds_conn_path_drop); @@ -704,9 +713,15 @@ EXPORT_SYMBOL_GPL(rds_conn_drop); */ void rds_conn_path_connect_if_down(struct rds_conn_path *cp) { + rcu_read_lock(); + if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) { + rcu_read_unlock(); + return; + } if (rds_conn_path_state(cp) == RDS_CONN_DOWN && !test_and_set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags)) queue_delayed_work(rds_wq, &cp->cp_conn_w, 0); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(rds_conn_path_connect_if_down); diff --git a/net/rds/rds.h b/net/rds/rds.h index c349c71babff..374ae83b60d4 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -88,6 +88,7 @@ enum { #define RDS_RECONNECT_PENDING 1 #define RDS_IN_XMIT 2 #define RDS_RECV_REFILL 3 +#define RDS_DESTROY_PENDING 4 /* Max number of multipaths per RDS connection. Must be a power of 2 */ #define RDS_MPATH_WORKERS 8 @@ -139,8 +140,7 @@ struct rds_connection { __be32 c_faddr; unsigned int c_loopback:1, c_ping_triggered:1, - c_destroy_in_prog:1, - c_pad_to_32:29; + c_pad_to_32:30; int c_npaths; struct rds_connection *c_passive; struct rds_transport *c_trans; @@ -150,7 +150,7 @@ struct rds_connection { /* Protocol version */ unsigned int c_version; - struct net *c_net; + possible_net_t c_net; struct list_head c_map_item; unsigned long c_map_queued; @@ -165,13 +165,13 @@ struct rds_connection { static inline struct net *rds_conn_net(struct rds_connection *conn) { - return conn->c_net; + return read_pnet(&conn->c_net); } static inline void rds_conn_net_set(struct rds_connection *conn, struct net *net) { - conn->c_net = get_net(net); + write_pnet(&conn->c_net, net); } #define RDS_FLAG_CONG_BITMAP 0x01 diff --git a/net/rds/send.c b/net/rds/send.c index f72466c63f0c..d3e32d1f3c7d 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -162,6 +162,12 @@ restart: goto out; } + if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) { + release_in_xmit(cp); + ret = -ENETUNREACH; /* dont requeue send work */ + goto out; + } + /* * we record the send generation after doing the xmit acquire. * if someone else manages to jump in and do some work, we'll use @@ -437,7 +443,12 @@ over_batch: !list_empty(&cp->cp_send_queue)) && !raced) { if (batch_count < send_batch_count) goto restart; - queue_delayed_work(rds_wq, &cp->cp_send_w, 1); + rcu_read_lock(); + if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) + ret = -ENETUNREACH; + else + queue_delayed_work(rds_wq, &cp->cp_send_w, 1); + rcu_read_unlock(); } else if (raced) { rds_stats_inc(s_send_lock_queue_raced); } @@ -1151,6 +1162,11 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) else cpath = &conn->c_path[0]; + if (test_bit(RDS_DESTROY_PENDING, &cpath->cp_flags)) { + ret = -EAGAIN; + goto out; + } + rds_conn_path_connect_if_down(cpath); ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs); @@ -1190,9 +1206,17 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) rds_stats_inc(s_send_queued); ret = rds_send_xmit(cpath); - if (ret == -ENOMEM || ret == -EAGAIN) - queue_delayed_work(rds_wq, &cpath->cp_send_w, 1); - + if (ret == -ENOMEM || ret == -EAGAIN) { + ret = 0; + rcu_read_lock(); + if (test_bit(RDS_DESTROY_PENDING, &cpath->cp_flags)) + ret = -ENETUNREACH; + else + queue_delayed_work(rds_wq, &cpath->cp_send_w, 1); + rcu_read_unlock(); + } + if (ret) + goto out; rds_message_put(rm); return payload_len; @@ -1270,7 +1294,10 @@ rds_send_probe(struct rds_conn_path *cp, __be16 sport, rds_stats_inc(s_send_pong); /* schedule the send work on rds_wq */ - queue_delayed_work(rds_wq, &cp->cp_send_w, 1); + rcu_read_lock(); + if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) + queue_delayed_work(rds_wq, &cp->cp_send_w, 1); + rcu_read_unlock(); rds_message_put(rm); return 0; diff --git a/net/rds/tcp.c b/net/rds/tcp.c index ab7356e0ba83..9920d2f84eff 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -271,16 +271,33 @@ static int rds_tcp_laddr_check(struct net *net, __be32 addr) return -EADDRNOTAVAIL; } +static void rds_tcp_conn_free(void *arg) +{ + struct rds_tcp_connection *tc = arg; + unsigned long flags; + + rdsdebug("freeing tc %p\n", tc); + + spin_lock_irqsave(&rds_tcp_conn_lock, flags); + if (!tc->t_tcp_node_detached) + list_del(&tc->t_tcp_node); + spin_unlock_irqrestore(&rds_tcp_conn_lock, flags); + + kmem_cache_free(rds_tcp_conn_slab, tc); +} + static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp) { struct rds_tcp_connection *tc; - int i; + int i, j; + int ret = 0; for (i = 0; i < RDS_MPATH_WORKERS; i++) { tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp); - if (!tc) - return -ENOMEM; - + if (!tc) { + ret = -ENOMEM; + break; + } mutex_init(&tc->t_conn_path_lock); tc->t_sock = NULL; tc->t_tinc = NULL; @@ -291,26 +308,17 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp) tc->t_cpath = &conn->c_path[i]; spin_lock_irq(&rds_tcp_conn_lock); + tc->t_tcp_node_detached = false; list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list); spin_unlock_irq(&rds_tcp_conn_lock); rdsdebug("rds_conn_path [%d] tc %p\n", i, conn->c_path[i].cp_transport_data); } - - return 0; -} - -static void rds_tcp_conn_free(void *arg) -{ - struct rds_tcp_connection *tc = arg; - unsigned long flags; - rdsdebug("freeing tc %p\n", tc); - - spin_lock_irqsave(&rds_tcp_conn_lock, flags); - list_del(&tc->t_tcp_node); - spin_unlock_irqrestore(&rds_tcp_conn_lock, flags); - - kmem_cache_free(rds_tcp_conn_slab, tc); + if (ret) { + for (j = 0; j < i; j++) + rds_tcp_conn_free(conn->c_path[j].cp_transport_data); + } + return ret; } static bool list_has_conn(struct list_head *list, struct rds_connection *conn) @@ -496,27 +504,6 @@ static struct pernet_operations rds_tcp_net_ops = { .size = sizeof(struct rds_tcp_net), }; -/* explicitly send a RST on each socket, thereby releasing any socket refcnts - * that may otherwise hold up netns deletion. - */ -static void rds_tcp_conn_paths_destroy(struct rds_connection *conn) -{ - struct rds_conn_path *cp; - struct rds_tcp_connection *tc; - int i; - struct sock *sk; - - for (i = 0; i < RDS_MPATH_WORKERS; i++) { - cp = &conn->c_path[i]; - tc = cp->cp_transport_data; - if (!tc->t_sock) - continue; - sk = tc->t_sock->sk; - sk->sk_prot->disconnect(sk, 0); - tcp_done(sk); - } -} - static void rds_tcp_kill_sock(struct net *net) { struct rds_tcp_connection *tc, *_tc; @@ -528,18 +515,20 @@ static void rds_tcp_kill_sock(struct net *net) rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w); spin_lock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { - struct net *c_net = tc->t_cpath->cp_conn->c_net; + struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); if (net != c_net || !tc->t_sock) continue; - if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) + if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) { list_move_tail(&tc->t_tcp_node, &tmp_list); + } else { + list_del(&tc->t_tcp_node); + tc->t_tcp_node_detached = true; + } } spin_unlock_irq(&rds_tcp_conn_lock); - list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) { - rds_tcp_conn_paths_destroy(tc->t_cpath->cp_conn); + list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) rds_conn_destroy(tc->t_cpath->cp_conn); - } } void *rds_tcp_listen_sock_def_readable(struct net *net) @@ -587,7 +576,7 @@ static void rds_tcp_sysctl_reset(struct net *net) spin_lock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { - struct net *c_net = tc->t_cpath->cp_conn->c_net; + struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); if (net != c_net || !tc->t_sock) continue; diff --git a/net/rds/tcp.h b/net/rds/tcp.h index 864ca7d8f019..c6fa080e9b6d 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -12,6 +12,7 @@ struct rds_tcp_incoming { struct rds_tcp_connection { struct list_head t_tcp_node; + bool t_tcp_node_detached; struct rds_conn_path *t_cpath; /* t_conn_path_lock synchronizes the connection establishment between * rds_tcp_accept_one and rds_tcp_conn_path_connect diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index 46f74dad0e16..534c67aeb20f 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -170,7 +170,7 @@ void rds_tcp_conn_path_shutdown(struct rds_conn_path *cp) cp->cp_conn, tc, sock); if (sock) { - if (cp->cp_conn->c_destroy_in_prog) + if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) rds_tcp_set_linger(sock); sock->ops->shutdown(sock, RCV_SHUTDOWN | SEND_SHUTDOWN); lock_sock(sock->sk); diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index e006ef8e6d40..dd707b9e73e5 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c @@ -321,8 +321,12 @@ void rds_tcp_data_ready(struct sock *sk) ready = tc->t_orig_data_ready; rds_tcp_stats_inc(s_tcp_data_ready_calls); - if (rds_tcp_read_sock(cp, GFP_ATOMIC) == -ENOMEM) - queue_delayed_work(rds_wq, &cp->cp_recv_w, 0); + if (rds_tcp_read_sock(cp, GFP_ATOMIC) == -ENOMEM) { + rcu_read_lock(); + if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) + queue_delayed_work(rds_wq, &cp->cp_recv_w, 0); + rcu_read_unlock(); + } out: read_unlock_bh(&sk->sk_callback_lock); ready(sk); diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 9b76e0fa1722..16f65744d984 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -202,8 +202,11 @@ void rds_tcp_write_space(struct sock *sk) tc->t_last_seen_una = rds_tcp_snd_una(tc); rds_send_path_drop_acked(cp, rds_tcp_snd_una(tc), rds_tcp_is_acked); - if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) + rcu_read_lock(); + if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf && + !test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) queue_delayed_work(rds_wq, &cp->cp_send_w, 0); + rcu_read_unlock(); out: read_unlock_bh(&sk->sk_callback_lock); diff --git a/net/rds/threads.c b/net/rds/threads.c index f121daa402c8..eb76db1360b0 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -87,8 +87,12 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr) cp->cp_reconnect_jiffies = 0; set_bit(0, &cp->cp_conn->c_map_queued); - queue_delayed_work(rds_wq, &cp->cp_send_w, 0); - queue_delayed_work(rds_wq, &cp->cp_recv_w, 0); + rcu_read_lock(); + if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) { + queue_delayed_work(rds_wq, &cp->cp_send_w, 0); + queue_delayed_work(rds_wq, &cp->cp_recv_w, 0); + } + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(rds_connect_path_complete); @@ -133,7 +137,10 @@ void rds_queue_reconnect(struct rds_conn_path *cp) set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags); if (cp->cp_reconnect_jiffies == 0) { cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies; - queue_delayed_work(rds_wq, &cp->cp_conn_w, 0); + rcu_read_lock(); + if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) + queue_delayed_work(rds_wq, &cp->cp_conn_w, 0); + rcu_read_unlock(); return; } @@ -141,8 +148,11 @@ void rds_queue_reconnect(struct rds_conn_path *cp) rdsdebug("%lu delay %lu ceil conn %p for %pI4 -> %pI4\n", rand % cp->cp_reconnect_jiffies, cp->cp_reconnect_jiffies, conn, &conn->c_laddr, &conn->c_faddr); - queue_delayed_work(rds_wq, &cp->cp_conn_w, - rand % cp->cp_reconnect_jiffies); + rcu_read_lock(); + if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) + queue_delayed_work(rds_wq, &cp->cp_conn_w, + rand % cp->cp_reconnect_jiffies); + rcu_read_unlock(); cp->cp_reconnect_jiffies = min(cp->cp_reconnect_jiffies * 2, rds_sysctl_reconnect_max_jiffies); |