From 5df6531292b5021ac9e4ed261eb7d1fa9ff3bf08 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Fri, 17 Jul 2020 12:35:26 +0200 Subject: inet6: Extract helper for selecting socket from reuseport group Prepare for calling into reuseport from inet6_lookup_listener as well. Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200717103536.397595-6-jakub@cloudflare.com --- net/ipv6/inet6_hashtables.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index fbe9d4295eac..03942eef8ab6 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -111,6 +111,23 @@ static inline int compute_score(struct sock *sk, struct net *net, return score; } +static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk, + struct sk_buff *skb, int doff, + const struct in6_addr *saddr, + __be16 sport, + const struct in6_addr *daddr, + unsigned short hnum) +{ + struct sock *reuse_sk = NULL; + u32 phash; + + if (sk->sk_reuseport) { + phash = inet6_ehashfn(net, daddr, hnum, saddr, sport); + reuse_sk = reuseport_select_sock(sk, phash, skb, doff); + } + return reuse_sk; +} + /* called with rcu_read_lock() */ static struct sock *inet6_lhash2_lookup(struct net *net, struct inet_listen_hashbucket *ilb2, @@ -123,21 +140,17 @@ static struct sock *inet6_lhash2_lookup(struct net *net, struct inet_connection_sock *icsk; struct sock *sk, *result = NULL; int score, hiscore = 0; - u32 phash = 0; inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) { sk = (struct sock *)icsk; score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif); if (score > hiscore) { - if (sk->sk_reuseport) { - phash = inet6_ehashfn(net, daddr, hnum, - saddr, sport); - result = reuseport_select_sock(sk, phash, - skb, doff); - if (result) - return result; - } + result = lookup_reuseport(net, sk, skb, doff, + saddr, sport, daddr, hnum); + if (result) + return result; + result = sk; hiscore = score; } -- cgit v1.2.3 From 1122702f02678597c4f1c7d316365ef502aafe08 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Fri, 17 Jul 2020 12:35:27 +0200 Subject: inet6: Run SK_LOOKUP BPF program on socket lookup Following ipv4 stack changes, run a BPF program attached to netns before looking up a listening socket. Program can return a listening socket to use as result of socket lookup, fail the lookup, or take no action. Suggested-by: Marek Majkowski Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200717103536.397595-7-jakub@cloudflare.com --- net/ipv6/inet6_hashtables.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 03942eef8ab6..2d3add9e6116 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -21,6 +21,8 @@ #include #include +extern struct inet_hashinfo tcp_hashinfo; + u32 inet6_ehashfn(const struct net *net, const struct in6_addr *laddr, const u16 lport, const struct in6_addr *faddr, const __be16 fport) @@ -159,6 +161,31 @@ static struct sock *inet6_lhash2_lookup(struct net *net, return result; } +static inline struct sock *inet6_lookup_run_bpf(struct net *net, + struct inet_hashinfo *hashinfo, + struct sk_buff *skb, int doff, + const struct in6_addr *saddr, + const __be16 sport, + const struct in6_addr *daddr, + const u16 hnum) +{ + struct sock *sk, *reuse_sk; + bool no_reuseport; + + if (hashinfo != &tcp_hashinfo) + return NULL; /* only TCP is supported */ + + no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP, + saddr, sport, daddr, hnum, &sk); + if (no_reuseport || IS_ERR_OR_NULL(sk)) + return sk; + + reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum); + if (reuse_sk) + sk = reuse_sk; + return sk; +} + struct sock *inet6_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, @@ -170,6 +197,14 @@ struct sock *inet6_lookup_listener(struct net *net, struct sock *result = NULL; unsigned int hash2; + /* Lookup redirect from BPF */ + if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { + result = inet6_lookup_run_bpf(net, hashinfo, skb, doff, + saddr, sport, daddr, hnum); + if (result) + goto done; + } + hash2 = ipv6_portaddr_hash(net, daddr, hnum); ilb2 = inet_lhash2_bucket(hashinfo, hash2); -- cgit v1.2.3 From 2a08748cd384cdfb8e1222bd3645a8d1d36e6a5d Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Fri, 17 Jul 2020 12:35:30 +0200 Subject: udp6: Extract helper for selecting socket from reuseport group Prepare for calling into reuseport from __udp6_lib_lookup as well. Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200717103536.397595-10-jakub@cloudflare.com --- net/ipv6/udp.c | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 38c0d9350c6b..084205c18a33 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -141,6 +141,27 @@ static int compute_score(struct sock *sk, struct net *net, return score; } +static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk, + struct sk_buff *skb, + const struct in6_addr *saddr, + __be16 sport, + const struct in6_addr *daddr, + unsigned int hnum) +{ + struct sock *reuse_sk = NULL; + u32 hash; + + if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) { + hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); + reuse_sk = reuseport_select_sock(sk, hash, skb, + sizeof(struct udphdr)); + /* Fall back to scoring if group has connections */ + if (reuseport_has_conns(sk, false)) + return NULL; + } + return reuse_sk; +} + /* called with rcu_read_lock() */ static struct sock *udp6_lib_lookup2(struct net *net, const struct in6_addr *saddr, __be16 sport, @@ -150,7 +171,6 @@ static struct sock *udp6_lib_lookup2(struct net *net, { struct sock *sk, *result; int score, badness; - u32 hash = 0; result = NULL; badness = -1; @@ -158,16 +178,11 @@ static struct sock *udp6_lib_lookup2(struct net *net, score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, sdif); if (score > badness) { - if (sk->sk_reuseport && - sk->sk_state != TCP_ESTABLISHED) { - hash = udp6_ehashfn(net, daddr, hnum, - saddr, sport); - - result = reuseport_select_sock(sk, hash, skb, - sizeof(struct udphdr)); - if (result && !reuseport_has_conns(sk, false)) - return result; - } + result = lookup_reuseport(net, sk, skb, + saddr, sport, daddr, hnum); + if (result) + return result; + result = sk; badness = score; } -- cgit v1.2.3 From 6d4201b1386b335bb53628a04ad63e547872dc5a Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Fri, 17 Jul 2020 12:35:31 +0200 Subject: udp6: Run SK_LOOKUP BPF program on socket lookup Same as for udp4, let BPF program override the socket lookup result, by selecting a receiving socket of its choice or failing the lookup, if no connected UDP socket matched packet 4-tuple. Suggested-by: Marek Majkowski Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200717103536.397595-11-jakub@cloudflare.com --- net/ipv6/udp.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 51 insertions(+), 9 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 084205c18a33..ff8be202726a 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -190,6 +190,31 @@ static struct sock *udp6_lib_lookup2(struct net *net, return result; } +static inline struct sock *udp6_lookup_run_bpf(struct net *net, + struct udp_table *udptable, + struct sk_buff *skb, + const struct in6_addr *saddr, + __be16 sport, + const struct in6_addr *daddr, + u16 hnum) +{ + struct sock *sk, *reuse_sk; + bool no_reuseport; + + if (udptable != &udp_table) + return NULL; /* only UDP is supported */ + + no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_UDP, + saddr, sport, daddr, hnum, &sk); + if (no_reuseport || IS_ERR_OR_NULL(sk)) + return sk; + + reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum); + if (reuse_sk) + sk = reuse_sk; + return sk; +} + /* rcu_read_lock() must be held */ struct sock *__udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, @@ -200,25 +225,42 @@ struct sock *__udp6_lib_lookup(struct net *net, unsigned short hnum = ntohs(dport); unsigned int hash2, slot2; struct udp_hslot *hslot2; - struct sock *result; + struct sock *result, *sk; hash2 = ipv6_portaddr_hash(net, daddr, hnum); slot2 = hash2 & udptable->mask; hslot2 = &udptable->hash2[slot2]; + /* Lookup connected or non-wildcard sockets */ result = udp6_lib_lookup2(net, saddr, sport, daddr, hnum, dif, sdif, hslot2, skb); - if (!result) { - hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum); - slot2 = hash2 & udptable->mask; + if (!IS_ERR_OR_NULL(result) && result->sk_state == TCP_ESTABLISHED) + goto done; + + /* Lookup redirect from BPF */ + if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { + sk = udp6_lookup_run_bpf(net, udptable, skb, + saddr, sport, daddr, hnum); + if (sk) { + result = sk; + goto done; + } + } - hslot2 = &udptable->hash2[slot2]; + /* Got non-wildcard socket or error on first lookup */ + if (result) + goto done; - result = udp6_lib_lookup2(net, saddr, sport, - &in6addr_any, hnum, dif, sdif, - hslot2, skb); - } + /* Lookup wildcard sockets */ + hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum); + slot2 = hash2 & udptable->mask; + hslot2 = &udptable->hash2[slot2]; + + result = udp6_lib_lookup2(net, saddr, sport, + &in6addr_any, hnum, dif, sdif, + hslot2, skb); +done: if (IS_ERR(result)) return NULL; return result; -- cgit v1.2.3 From 951cf368bcb11d6f817709660cf5cd914072c36f Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 20 Jul 2020 09:34:03 -0700 Subject: bpf: net: Use precomputed btf_id for bpf iterators One additional field btf_id is added to struct bpf_ctx_arg_aux to store the precomputed btf_ids. The btf_id is computed at build time with BTF_ID_LIST or BTF_ID_LIST_GLOBAL macro definitions. All existing bpf iterators are changed to used pre-compute btf_ids. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200720163403.1393551-1-yhs@fb.com --- net/ipv6/route.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 427b81cbc164..33f5efbad0a9 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -61,6 +61,7 @@ #include #include #include +#include #ifdef CONFIG_SYSCTL #include @@ -6423,7 +6424,10 @@ void __init ip6_route_init_special_entries(void) #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *rt) -static const struct bpf_iter_reg ipv6_route_reg_info = { +BTF_ID_LIST(btf_fib6_info_id) +BTF_ID(struct, fib6_info) + +static struct bpf_iter_reg ipv6_route_reg_info = { .target = "ipv6_route", .seq_ops = &ipv6_route_seq_ops, .init_seq_private = bpf_iter_init_seq_net, @@ -6438,6 +6442,7 @@ static const struct bpf_iter_reg ipv6_route_reg_info = { static int __init bpf_iter_register(void) { + ipv6_route_reg_info.ctx_arg_info[0].btf_id = *btf_fib6_info_id; return bpf_iter_reg_target(&ipv6_route_reg_info); } -- cgit v1.2.3