summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWillem de Bruijn <willemb@google.com>2019-09-12 21:16:39 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2019-09-21 07:16:43 +0200
commitfdd60d80c4294b7203d6f9d075a57da0a8d85fba (patch)
tree88b4a9c352f80f3793007c5e76bb487867784b30
parent97b5f8c95b75817f0ab27438e8dab136a3ae9ae8 (diff)
downloadlinux-stable-fdd60d80c4294b7203d6f9d075a57da0a8d85fba.tar.gz
linux-stable-fdd60d80c4294b7203d6f9d075a57da0a8d85fba.tar.bz2
linux-stable-fdd60d80c4294b7203d6f9d075a57da0a8d85fba.zip
udp: correct reuseport selection with connected sockets
[ Upstream commit acdcecc61285faed359f1a3568c32089cc3a8329 ] UDP reuseport groups can hold a mix unconnected and connected sockets. Ensure that connections only receive all traffic to their 4-tuple. Fast reuseport returns on the first reuseport match on the assumption that all matches are equal. Only if connections are present, return to the previous behavior of scoring all sockets. Record if connections are present and if so (1) treat such connected sockets as an independent match from the group, (2) only return 2-tuple matches from reuseport and (3) do not return on the first 2-tuple reuseport match to allow for a higher scoring match later. New field has_conns is set without locks. No other fields in the bitmap are modified at runtime and the field is only ever set unconditionally, so an RMW cannot miss a change. Fixes: e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection") Link: http://lkml.kernel.org/r/CA+FuTSfRP09aJNYRt04SS6qj22ViiOEWaWmLAwX0psk8-PGNxw@mail.gmail.com Signed-off-by: Willem de Bruijn <willemb@google.com> Acked-by: Paolo Abeni <pabeni@redhat.com> Acked-by: Craig Gallek <kraig@google.com> Signed-off-by: Willem de Bruijn <willemb@google.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--include/net/sock_reuseport.h21
-rw-r--r--net/core/sock_reuseport.c15
-rw-r--r--net/ipv4/datagram.c2
-rw-r--r--net/ipv4/udp.c5
-rw-r--r--net/ipv6/datagram.c2
-rw-r--r--net/ipv6/udp.c5
6 files changed, 43 insertions, 7 deletions
diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index 8a5f70c7cdf2..5e69fba181bc 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -21,7 +21,8 @@ struct sock_reuseport {
unsigned int synq_overflow_ts;
/* ID stays the same even after the size of socks[] grows. */
unsigned int reuseport_id;
- bool bind_inany;
+ unsigned int bind_inany:1;
+ unsigned int has_conns:1;
struct bpf_prog __rcu *prog; /* optional BPF sock selector */
struct sock *socks[0]; /* array of sock pointers */
};
@@ -35,6 +36,24 @@ extern struct sock *reuseport_select_sock(struct sock *sk,
struct sk_buff *skb,
int hdr_len);
extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
+
+static inline bool reuseport_has_conns(struct sock *sk, bool set)
+{
+ struct sock_reuseport *reuse;
+ bool ret = false;
+
+ rcu_read_lock();
+ reuse = rcu_dereference(sk->sk_reuseport_cb);
+ if (reuse) {
+ if (set)
+ reuse->has_conns = 1;
+ ret = reuse->has_conns;
+ }
+ rcu_read_unlock();
+
+ return ret;
+}
+
int reuseport_get_id(struct sock_reuseport *reuse);
#endif /* _SOCK_REUSEPORT_H */
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index ba5cba56f574..fd38cf1d2b02 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -292,8 +292,19 @@ struct sock *reuseport_select_sock(struct sock *sk,
select_by_hash:
/* no bpf or invalid bpf result: fall back to hash usage */
- if (!sk2)
- sk2 = reuse->socks[reciprocal_scale(hash, socks)];
+ if (!sk2) {
+ int i, j;
+
+ i = j = reciprocal_scale(hash, socks);
+ while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) {
+ i++;
+ if (i >= reuse->num_socks)
+ i = 0;
+ if (i == j)
+ goto out;
+ }
+ sk2 = reuse->socks[i];
+ }
}
out:
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index f915abff1350..80107a6a2c4a 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -19,6 +19,7 @@
#include <net/sock.h>
#include <net/route.h>
#include <net/tcp_states.h>
+#include <net/sock_reuseport.h>
int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
@@ -73,6 +74,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
}
inet->inet_daddr = fl4->daddr;
inet->inet_dport = usin->sin_port;
+ reuseport_has_conns(sk, true);
sk->sk_state = TCP_ESTABLISHED;
sk_set_txhash(sk);
inet->inet_id = jiffies;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 6ab68b06fa39..2085fc0046de 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -443,12 +443,13 @@ static struct sock *udp4_lib_lookup2(struct net *net,
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif, exact_dif);
if (score > badness) {
- if (sk->sk_reuseport) {
+ if (sk->sk_reuseport &&
+ sk->sk_state != TCP_ESTABLISHED) {
hash = udp_ehashfn(net, daddr, hnum,
saddr, sport);
result = reuseport_select_sock(sk, hash, skb,
sizeof(struct udphdr));
- if (result)
+ if (result && !reuseport_has_conns(sk, false))
return result;
}
badness = score;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index cb24850d2c7f..971a0fdf1fbc 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -31,6 +31,7 @@
#include <net/ip6_route.h>
#include <net/tcp_states.h>
#include <net/dsfield.h>
+#include <net/sock_reuseport.h>
#include <linux/errqueue.h>
#include <linux/uaccess.h>
@@ -258,6 +259,7 @@ ipv4_connected:
goto out;
}
+ reuseport_has_conns(sk, true);
sk->sk_state = TCP_ESTABLISHED;
sk_set_txhash(sk);
out:
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 164f1d01273c..3a27c04ff62f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -177,13 +177,14 @@ static struct sock *udp6_lib_lookup2(struct net *net,
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif, exact_dif);
if (score > badness) {
- if (sk->sk_reuseport) {
+ if (sk->sk_reuseport &&
+ sk->sk_state != TCP_ESTABLISHED) {
hash = udp6_ehashfn(net, daddr, hnum,
saddr, sport);
result = reuseport_select_sock(sk, hash, skb,
sizeof(struct udphdr));
- if (result)
+ if (result && !reuseport_has_conns(sk, false))
return result;
}
result = sk;