summaryrefslogtreecommitdiffstats
path: root/net/rds
diff options
context:
space:
mode:
Diffstat (limited to 'net/rds')
-rw-r--r--net/rds/bind.c1
-rw-r--r--net/rds/cong.c10
-rw-r--r--net/rds/connection.c27
-rw-r--r--net/rds/rds.h10
-rw-r--r--net/rds/send.c37
-rw-r--r--net/rds/tcp.c81
-rw-r--r--net/rds/tcp.h1
-rw-r--r--net/rds/tcp_connect.c2
-rw-r--r--net/rds/tcp_recv.c8
-rw-r--r--net/rds/tcp_send.c5
-rw-r--r--net/rds/threads.c20
11 files changed, 128 insertions, 74 deletions
diff --git a/net/rds/bind.c b/net/rds/bind.c
index 75d43dc8e96b..5aa3a64aa4f0 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -114,6 +114,7 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
rs, &addr, (int)ntohs(*port));
break;
} else {
+ rs->rs_bound_addr = 0;
rds_sock_put(rs);
ret = -ENOMEM;
break;
diff --git a/net/rds/cong.c b/net/rds/cong.c
index 8398fee7c866..8d19fd25dce3 100644
--- a/net/rds/cong.c
+++ b/net/rds/cong.c
@@ -219,7 +219,11 @@ void rds_cong_queue_updates(struct rds_cong_map *map)
spin_lock_irqsave(&rds_cong_lock, flags);
list_for_each_entry(conn, &map->m_conn_list, c_map_item) {
- if (!test_and_set_bit(0, &conn->c_map_queued)) {
+ struct rds_conn_path *cp = &conn->c_path[0];
+
+ rcu_read_lock();
+ if (!test_and_set_bit(0, &conn->c_map_queued) &&
+ !test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
rds_stats_inc(s_cong_update_queued);
/* We cannot inline the call to rds_send_xmit() here
* for two reasons (both pertaining to a TCP transport):
@@ -235,9 +239,9 @@ void rds_cong_queue_updates(struct rds_cong_map *map)
* therefore trigger warnings.
* Defer the xmit to rds_send_worker() instead.
*/
- queue_delayed_work(rds_wq,
- &conn->c_path[0].cp_send_w, 0);
+ queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
}
+ rcu_read_unlock();
}
spin_unlock_irqrestore(&rds_cong_lock, flags);
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 7ee2d5d68b78..b10c0ef36d8d 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -230,8 +230,8 @@ static struct rds_connection *__rds_conn_create(struct net *net,
rdsdebug("allocated conn %p for %pI4 -> %pI4 over %s %s\n",
conn, &laddr, &faddr,
- trans->t_name ? trans->t_name : "[unknown]",
- is_outgoing ? "(outgoing)" : "");
+ strnlen(trans->t_name, sizeof(trans->t_name)) ? trans->t_name :
+ "[unknown]", is_outgoing ? "(outgoing)" : "");
/*
* Since we ran without holding the conn lock, someone could
@@ -382,10 +382,13 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp)
{
struct rds_message *rm, *rtmp;
+ set_bit(RDS_DESTROY_PENDING, &cp->cp_flags);
+
if (!cp->cp_transport_data)
return;
/* make sure lingering queued work won't try to ref the conn */
+ synchronize_rcu();
cancel_delayed_work_sync(&cp->cp_send_w);
cancel_delayed_work_sync(&cp->cp_recv_w);
@@ -403,6 +406,11 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp)
if (cp->cp_xmit_rm)
rds_message_put(cp->cp_xmit_rm);
+ WARN_ON(delayed_work_pending(&cp->cp_send_w));
+ WARN_ON(delayed_work_pending(&cp->cp_recv_w));
+ WARN_ON(delayed_work_pending(&cp->cp_conn_w));
+ WARN_ON(work_pending(&cp->cp_down_w));
+
cp->cp_conn->c_trans->conn_free(cp->cp_transport_data);
}
@@ -424,7 +432,6 @@ void rds_conn_destroy(struct rds_connection *conn)
"%pI4\n", conn, &conn->c_laddr,
&conn->c_faddr);
- conn->c_destroy_in_prog = 1;
/* Ensure conn will not be scheduled for reconnect */
spin_lock_irq(&rds_conn_lock);
hlist_del_init_rcu(&conn->c_hash_node);
@@ -445,7 +452,6 @@ void rds_conn_destroy(struct rds_connection *conn)
*/
rds_cong_remove_conn(conn);
- put_net(conn->c_net);
kfree(conn->c_path);
kmem_cache_free(rds_conn_slab, conn);
@@ -684,10 +690,13 @@ void rds_conn_path_drop(struct rds_conn_path *cp, bool destroy)
{
atomic_set(&cp->cp_state, RDS_CONN_ERROR);
- if (!destroy && cp->cp_conn->c_destroy_in_prog)
+ rcu_read_lock();
+ if (!destroy && test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
+ rcu_read_unlock();
return;
-
+ }
queue_work(rds_wq, &cp->cp_down_w);
+ rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(rds_conn_path_drop);
@@ -704,9 +713,15 @@ EXPORT_SYMBOL_GPL(rds_conn_drop);
*/
void rds_conn_path_connect_if_down(struct rds_conn_path *cp)
{
+ rcu_read_lock();
+ if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
+ rcu_read_unlock();
+ return;
+ }
if (rds_conn_path_state(cp) == RDS_CONN_DOWN &&
!test_and_set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags))
queue_delayed_work(rds_wq, &cp->cp_conn_w, 0);
+ rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(rds_conn_path_connect_if_down);
diff --git a/net/rds/rds.h b/net/rds/rds.h
index c349c71babff..374ae83b60d4 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -88,6 +88,7 @@ enum {
#define RDS_RECONNECT_PENDING 1
#define RDS_IN_XMIT 2
#define RDS_RECV_REFILL 3
+#define RDS_DESTROY_PENDING 4
/* Max number of multipaths per RDS connection. Must be a power of 2 */
#define RDS_MPATH_WORKERS 8
@@ -139,8 +140,7 @@ struct rds_connection {
__be32 c_faddr;
unsigned int c_loopback:1,
c_ping_triggered:1,
- c_destroy_in_prog:1,
- c_pad_to_32:29;
+ c_pad_to_32:30;
int c_npaths;
struct rds_connection *c_passive;
struct rds_transport *c_trans;
@@ -150,7 +150,7 @@ struct rds_connection {
/* Protocol version */
unsigned int c_version;
- struct net *c_net;
+ possible_net_t c_net;
struct list_head c_map_item;
unsigned long c_map_queued;
@@ -165,13 +165,13 @@ struct rds_connection {
static inline
struct net *rds_conn_net(struct rds_connection *conn)
{
- return conn->c_net;
+ return read_pnet(&conn->c_net);
}
static inline
void rds_conn_net_set(struct rds_connection *conn, struct net *net)
{
- conn->c_net = get_net(net);
+ write_pnet(&conn->c_net, net);
}
#define RDS_FLAG_CONG_BITMAP 0x01
diff --git a/net/rds/send.c b/net/rds/send.c
index f72466c63f0c..d3e32d1f3c7d 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -162,6 +162,12 @@ restart:
goto out;
}
+ if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
+ release_in_xmit(cp);
+ ret = -ENETUNREACH; /* dont requeue send work */
+ goto out;
+ }
+
/*
* we record the send generation after doing the xmit acquire.
* if someone else manages to jump in and do some work, we'll use
@@ -437,7 +443,12 @@ over_batch:
!list_empty(&cp->cp_send_queue)) && !raced) {
if (batch_count < send_batch_count)
goto restart;
- queue_delayed_work(rds_wq, &cp->cp_send_w, 1);
+ rcu_read_lock();
+ if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
+ ret = -ENETUNREACH;
+ else
+ queue_delayed_work(rds_wq, &cp->cp_send_w, 1);
+ rcu_read_unlock();
} else if (raced) {
rds_stats_inc(s_send_lock_queue_raced);
}
@@ -1151,6 +1162,11 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
else
cpath = &conn->c_path[0];
+ if (test_bit(RDS_DESTROY_PENDING, &cpath->cp_flags)) {
+ ret = -EAGAIN;
+ goto out;
+ }
+
rds_conn_path_connect_if_down(cpath);
ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs);
@@ -1190,9 +1206,17 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
rds_stats_inc(s_send_queued);
ret = rds_send_xmit(cpath);
- if (ret == -ENOMEM || ret == -EAGAIN)
- queue_delayed_work(rds_wq, &cpath->cp_send_w, 1);
-
+ if (ret == -ENOMEM || ret == -EAGAIN) {
+ ret = 0;
+ rcu_read_lock();
+ if (test_bit(RDS_DESTROY_PENDING, &cpath->cp_flags))
+ ret = -ENETUNREACH;
+ else
+ queue_delayed_work(rds_wq, &cpath->cp_send_w, 1);
+ rcu_read_unlock();
+ }
+ if (ret)
+ goto out;
rds_message_put(rm);
return payload_len;
@@ -1270,7 +1294,10 @@ rds_send_probe(struct rds_conn_path *cp, __be16 sport,
rds_stats_inc(s_send_pong);
/* schedule the send work on rds_wq */
- queue_delayed_work(rds_wq, &cp->cp_send_w, 1);
+ rcu_read_lock();
+ if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
+ queue_delayed_work(rds_wq, &cp->cp_send_w, 1);
+ rcu_read_unlock();
rds_message_put(rm);
return 0;
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index ab7356e0ba83..9920d2f84eff 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -271,16 +271,33 @@ static int rds_tcp_laddr_check(struct net *net, __be32 addr)
return -EADDRNOTAVAIL;
}
+static void rds_tcp_conn_free(void *arg)
+{
+ struct rds_tcp_connection *tc = arg;
+ unsigned long flags;
+
+ rdsdebug("freeing tc %p\n", tc);
+
+ spin_lock_irqsave(&rds_tcp_conn_lock, flags);
+ if (!tc->t_tcp_node_detached)
+ list_del(&tc->t_tcp_node);
+ spin_unlock_irqrestore(&rds_tcp_conn_lock, flags);
+
+ kmem_cache_free(rds_tcp_conn_slab, tc);
+}
+
static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
{
struct rds_tcp_connection *tc;
- int i;
+ int i, j;
+ int ret = 0;
for (i = 0; i < RDS_MPATH_WORKERS; i++) {
tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp);
- if (!tc)
- return -ENOMEM;
-
+ if (!tc) {
+ ret = -ENOMEM;
+ break;
+ }
mutex_init(&tc->t_conn_path_lock);
tc->t_sock = NULL;
tc->t_tinc = NULL;
@@ -291,26 +308,17 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
tc->t_cpath = &conn->c_path[i];
spin_lock_irq(&rds_tcp_conn_lock);
+ tc->t_tcp_node_detached = false;
list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list);
spin_unlock_irq(&rds_tcp_conn_lock);
rdsdebug("rds_conn_path [%d] tc %p\n", i,
conn->c_path[i].cp_transport_data);
}
-
- return 0;
-}
-
-static void rds_tcp_conn_free(void *arg)
-{
- struct rds_tcp_connection *tc = arg;
- unsigned long flags;
- rdsdebug("freeing tc %p\n", tc);
-
- spin_lock_irqsave(&rds_tcp_conn_lock, flags);
- list_del(&tc->t_tcp_node);
- spin_unlock_irqrestore(&rds_tcp_conn_lock, flags);
-
- kmem_cache_free(rds_tcp_conn_slab, tc);
+ if (ret) {
+ for (j = 0; j < i; j++)
+ rds_tcp_conn_free(conn->c_path[j].cp_transport_data);
+ }
+ return ret;
}
static bool list_has_conn(struct list_head *list, struct rds_connection *conn)
@@ -496,27 +504,6 @@ static struct pernet_operations rds_tcp_net_ops = {
.size = sizeof(struct rds_tcp_net),
};
-/* explicitly send a RST on each socket, thereby releasing any socket refcnts
- * that may otherwise hold up netns deletion.
- */
-static void rds_tcp_conn_paths_destroy(struct rds_connection *conn)
-{
- struct rds_conn_path *cp;
- struct rds_tcp_connection *tc;
- int i;
- struct sock *sk;
-
- for (i = 0; i < RDS_MPATH_WORKERS; i++) {
- cp = &conn->c_path[i];
- tc = cp->cp_transport_data;
- if (!tc->t_sock)
- continue;
- sk = tc->t_sock->sk;
- sk->sk_prot->disconnect(sk, 0);
- tcp_done(sk);
- }
-}
-
static void rds_tcp_kill_sock(struct net *net)
{
struct rds_tcp_connection *tc, *_tc;
@@ -528,18 +515,20 @@ static void rds_tcp_kill_sock(struct net *net)
rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
spin_lock_irq(&rds_tcp_conn_lock);
list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
- struct net *c_net = tc->t_cpath->cp_conn->c_net;
+ struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
if (net != c_net || !tc->t_sock)
continue;
- if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn))
+ if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) {
list_move_tail(&tc->t_tcp_node, &tmp_list);
+ } else {
+ list_del(&tc->t_tcp_node);
+ tc->t_tcp_node_detached = true;
+ }
}
spin_unlock_irq(&rds_tcp_conn_lock);
- list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) {
- rds_tcp_conn_paths_destroy(tc->t_cpath->cp_conn);
+ list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node)
rds_conn_destroy(tc->t_cpath->cp_conn);
- }
}
void *rds_tcp_listen_sock_def_readable(struct net *net)
@@ -587,7 +576,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
spin_lock_irq(&rds_tcp_conn_lock);
list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
- struct net *c_net = tc->t_cpath->cp_conn->c_net;
+ struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
if (net != c_net || !tc->t_sock)
continue;
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index 864ca7d8f019..c6fa080e9b6d 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -12,6 +12,7 @@ struct rds_tcp_incoming {
struct rds_tcp_connection {
struct list_head t_tcp_node;
+ bool t_tcp_node_detached;
struct rds_conn_path *t_cpath;
/* t_conn_path_lock synchronizes the connection establishment between
* rds_tcp_accept_one and rds_tcp_conn_path_connect
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index 46f74dad0e16..534c67aeb20f 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -170,7 +170,7 @@ void rds_tcp_conn_path_shutdown(struct rds_conn_path *cp)
cp->cp_conn, tc, sock);
if (sock) {
- if (cp->cp_conn->c_destroy_in_prog)
+ if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
rds_tcp_set_linger(sock);
sock->ops->shutdown(sock, RCV_SHUTDOWN | SEND_SHUTDOWN);
lock_sock(sock->sk);
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index e006ef8e6d40..dd707b9e73e5 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -321,8 +321,12 @@ void rds_tcp_data_ready(struct sock *sk)
ready = tc->t_orig_data_ready;
rds_tcp_stats_inc(s_tcp_data_ready_calls);
- if (rds_tcp_read_sock(cp, GFP_ATOMIC) == -ENOMEM)
- queue_delayed_work(rds_wq, &cp->cp_recv_w, 0);
+ if (rds_tcp_read_sock(cp, GFP_ATOMIC) == -ENOMEM) {
+ rcu_read_lock();
+ if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
+ queue_delayed_work(rds_wq, &cp->cp_recv_w, 0);
+ rcu_read_unlock();
+ }
out:
read_unlock_bh(&sk->sk_callback_lock);
ready(sk);
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 9b76e0fa1722..16f65744d984 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -202,8 +202,11 @@ void rds_tcp_write_space(struct sock *sk)
tc->t_last_seen_una = rds_tcp_snd_una(tc);
rds_send_path_drop_acked(cp, rds_tcp_snd_una(tc), rds_tcp_is_acked);
- if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf)
+ rcu_read_lock();
+ if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf &&
+ !test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
+ rcu_read_unlock();
out:
read_unlock_bh(&sk->sk_callback_lock);
diff --git a/net/rds/threads.c b/net/rds/threads.c
index f121daa402c8..eb76db1360b0 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -87,8 +87,12 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr)
cp->cp_reconnect_jiffies = 0;
set_bit(0, &cp->cp_conn->c_map_queued);
- queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
- queue_delayed_work(rds_wq, &cp->cp_recv_w, 0);
+ rcu_read_lock();
+ if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
+ queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
+ queue_delayed_work(rds_wq, &cp->cp_recv_w, 0);
+ }
+ rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(rds_connect_path_complete);
@@ -133,7 +137,10 @@ void rds_queue_reconnect(struct rds_conn_path *cp)
set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
if (cp->cp_reconnect_jiffies == 0) {
cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies;
- queue_delayed_work(rds_wq, &cp->cp_conn_w, 0);
+ rcu_read_lock();
+ if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
+ queue_delayed_work(rds_wq, &cp->cp_conn_w, 0);
+ rcu_read_unlock();
return;
}
@@ -141,8 +148,11 @@ void rds_queue_reconnect(struct rds_conn_path *cp)
rdsdebug("%lu delay %lu ceil conn %p for %pI4 -> %pI4\n",
rand % cp->cp_reconnect_jiffies, cp->cp_reconnect_jiffies,
conn, &conn->c_laddr, &conn->c_faddr);
- queue_delayed_work(rds_wq, &cp->cp_conn_w,
- rand % cp->cp_reconnect_jiffies);
+ rcu_read_lock();
+ if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
+ queue_delayed_work(rds_wq, &cp->cp_conn_w,
+ rand % cp->cp_reconnect_jiffies);
+ rcu_read_unlock();
cp->cp_reconnect_jiffies = min(cp->cp_reconnect_jiffies * 2,
rds_sysctl_reconnect_max_jiffies);