summaryrefslogtreecommitdiffstats
path: root/net/rds/connection.c
diff options
context:
space:
mode:
authorSowmini Varadhan <sowmini.varadhan@oracle.com>2015-09-30 16:54:07 -0400
committerDavid S. Miller <davem@davemloft.net>2015-10-05 03:34:51 -0700
commit3b20fc389705a4c959adebc494578cb99bb8be9e (patch)
treec108cf3b2a4221995ae3b8c7dc91431fc5322b0e /net/rds/connection.c
parent393159e917242c39a5c07173914a0e2a2367ee69 (diff)
downloadlinux-3b20fc389705a4c959adebc494578cb99bb8be9e.tar.gz
linux-3b20fc389705a4c959adebc494578cb99bb8be9e.tar.bz2
linux-3b20fc389705a4c959adebc494578cb99bb8be9e.zip
RDS: Use a single TCP socket for both send and receive.
Commit f711a6ae062c ("net/rds: RDS-TCP: Always create a new rds_sock for an incoming connection.") modified rds-tcp so that an incoming SYN would ignore an existing "client" TCP connection which had the local port set to the transient port. The motivation for ignoring the existing "client" connection in f711a6ae was to avoid race conditions and an endless duel of reconnect attempts triggered by a restart/abort of one of the nodes in the TCP connection. However, having separate sockets for active and passive sides is avoidable, and the simpler model of a single TCP socket for both send and receives of all RDS connections associated with that tcp socket makes for easier observability. We avoid the race conditions from f711a6ae by attempting reconnects in rds_conn_shutdown if, and only if, the (new) c_outgoing bit is set for RDS_TRANS_TCP. The c_outgoing bit is initialized in __rds_conn_create(). A side-effect of re-using the client rds_connection for an incoming SYN is the potential of encountering duelling SYNs, i.e., we have an outgoing RDS_CONN_CONNECTING socket when we get the incoming SYN. The logic to arbitrate this criss-crossing SYN exchange in rds_tcp_accept_one() has been modified to emulate the BGP state machine: the smaller IP address should back off from the connection attempt. Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/rds/connection.c')
-rw-r--r--net/rds/connection.c22
1 files changed, 6 insertions, 16 deletions
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 49adeef8090c..d4564036a339 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -128,10 +128,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
struct rds_transport *loop_trans;
unsigned long flags;
int ret;
- struct rds_transport *otrans = trans;
- if (!is_outgoing && otrans->t_type == RDS_TRANS_TCP)
- goto new_conn;
rcu_read_lock();
conn = rds_conn_lookup(net, head, laddr, faddr, trans);
if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport &&
@@ -147,7 +144,6 @@ static struct rds_connection *__rds_conn_create(struct net *net,
if (conn)
goto out;
-new_conn:
conn = kmem_cache_zalloc(rds_conn_slab, gfp);
if (!conn) {
conn = ERR_PTR(-ENOMEM);
@@ -207,6 +203,7 @@ new_conn:
atomic_set(&conn->c_state, RDS_CONN_DOWN);
conn->c_send_gen = 0;
+ conn->c_outgoing = (is_outgoing ? 1 : 0);
conn->c_reconnect_jiffies = 0;
INIT_DELAYED_WORK(&conn->c_send_w, rds_send_worker);
INIT_DELAYED_WORK(&conn->c_recv_w, rds_recv_worker);
@@ -243,22 +240,13 @@ new_conn:
/* Creating normal conn */
struct rds_connection *found;
- if (!is_outgoing && otrans->t_type == RDS_TRANS_TCP)
- found = NULL;
- else
- found = rds_conn_lookup(net, head, laddr, faddr, trans);
+ found = rds_conn_lookup(net, head, laddr, faddr, trans);
if (found) {
trans->conn_free(conn->c_transport_data);
kmem_cache_free(rds_conn_slab, conn);
conn = found;
} else {
- if ((is_outgoing && otrans->t_type == RDS_TRANS_TCP) ||
- (otrans->t_type != RDS_TRANS_TCP)) {
- /* Only the active side should be added to
- * reconnect list for TCP.
- */
- hlist_add_head_rcu(&conn->c_hash_node, head);
- }
+ hlist_add_head_rcu(&conn->c_hash_node, head);
rds_cong_add_conn(conn);
rds_conn_count++;
}
@@ -337,7 +325,9 @@ void rds_conn_shutdown(struct rds_connection *conn)
rcu_read_lock();
if (!hlist_unhashed(&conn->c_hash_node)) {
rcu_read_unlock();
- rds_queue_reconnect(conn);
+ if (conn->c_trans->t_type != RDS_TRANS_TCP ||
+ conn->c_outgoing == 1)
+ rds_queue_reconnect(conn);
} else {
rcu_read_unlock();
}