From 283c16a2dfd332bf5610c874f7b9f9c8b601ce53 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 14 Dec 2018 11:51:57 +0100 Subject: indirect call wrappers: helpers to speed-up indirect calls of builtin This header define a bunch of helpers that allow avoiding the retpoline overhead when calling builtin functions via function pointers. It boils down to explicitly comparing the function pointers to known builtin functions and eventually invoke directly the latter. The macros defined here implement the boilerplate for the above schema and will be used by the next patches. rfc -> v1: - use branch prediction hint, as suggested by Eric v1 -> v2: - list explicitly the builtin function names in INDIRECT_CALL_*(), as suggested by Ed Cree Suggested-by: Eric Dumazet Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/indirect_call_wrapper.h | 51 +++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 include/linux/indirect_call_wrapper.h diff --git a/include/linux/indirect_call_wrapper.h b/include/linux/indirect_call_wrapper.h new file mode 100644 index 000000000000..7c8b7f4948af --- /dev/null +++ b/include/linux/indirect_call_wrapper.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_INDIRECT_CALL_WRAPPER_H +#define _LINUX_INDIRECT_CALL_WRAPPER_H + +#ifdef CONFIG_RETPOLINE + +/* + * INDIRECT_CALL_$NR - wrapper for indirect calls with $NR known builtin + * @f: function pointer + * @f$NR: builtin functions names, up to $NR of them + * @__VA_ARGS__: arguments for @f + * + * Avoid retpoline overhead for known builtin, checking @f vs each of them and + * eventually invoking directly the builtin function. The functions are check + * in the given order. Fallback to the indirect call. + */ +#define INDIRECT_CALL_1(f, f1, ...) \ + ({ \ + likely(f == f1) ? f1(__VA_ARGS__) : f(__VA_ARGS__); \ + }) +#define INDIRECT_CALL_2(f, f2, f1, ...) \ + ({ \ + likely(f == f2) ? f2(__VA_ARGS__) : \ + INDIRECT_CALL_1(f, f1, __VA_ARGS__); \ + }) + +#define INDIRECT_CALLABLE_DECLARE(f) f +#define INDIRECT_CALLABLE_SCOPE + +#else +#define INDIRECT_CALL_1(f, name, ...) f(__VA_ARGS__) +#define INDIRECT_CALL_2(f, name, ...) f(__VA_ARGS__) +#define INDIRECT_CALLABLE_DECLARE(f) +#define INDIRECT_CALLABLE_SCOPE static +#endif + +/* + * We can use INDIRECT_CALL_$NR for ipv6 related functions only if ipv6 is + * builtin, this macro simplify dealing with indirect calls with only ipv4/ipv6 + * alternatives + */ +#if IS_BUILTIN(CONFIG_IPV6) +#define INDIRECT_CALL_INET(f, f2, f1, ...) \ + INDIRECT_CALL_2(f, f2, f1, __VA_ARGS__) +#elif IS_ENABLED(CONFIG_INET) +#define INDIRECT_CALL_INET(f, f2, f1, ...) INDIRECT_CALL_1(f, f1, __VA_ARGS__) +#else +#define INDIRECT_CALL_INET(f, f2, f1, ...) f(__VA_ARGS__) +#endif + +#endif -- cgit v1.2.3 From aaa5d90b395a72faff797b00d815165ee0e664c0 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 14 Dec 2018 11:51:58 +0100 Subject: net: use indirect call wrappers at GRO network layer This avoids an indirect calls for L3 GRO receive path, both for ipv4 and ipv6, if the latter is not compiled as a module. Note that when IPv6 is compiled as builtin, it will be checked first, so we have a single additional compare for the more common path. v1 -> v2: - adapted to INDIRECT_CALL_ changes Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/inet_common.h | 2 ++ net/core/dev.c | 15 +++++++++++++-- net/ipv6/ip6_offload.c | 6 +++--- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/include/net/inet_common.h b/include/net/inet_common.h index 3ca969cbd161..56e7592811ea 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -2,6 +2,8 @@ #ifndef _INET_COMMON_H #define _INET_COMMON_H +#include + extern const struct proto_ops inet_stream_ops; extern const struct proto_ops inet_dgram_ops; diff --git a/net/core/dev.c b/net/core/dev.c index ed9aa4a91f1f..1b5a4410be0e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -145,6 +145,7 @@ #include #include #include +#include #include "net-sysfs.h" @@ -5338,6 +5339,8 @@ static void flush_all_backlogs(void) put_online_cpus(); } +INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int)); +INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int)); static int napi_gro_complete(struct sk_buff *skb) { struct packet_offload *ptype; @@ -5357,7 +5360,9 @@ static int napi_gro_complete(struct sk_buff *skb) if (ptype->type != type || !ptype->callbacks.gro_complete) continue; - err = ptype->callbacks.gro_complete(skb, 0); + err = INDIRECT_CALL_INET(ptype->callbacks.gro_complete, + ipv6_gro_complete, inet_gro_complete, + skb, 0); break; } rcu_read_unlock(); @@ -5504,6 +5509,10 @@ static void gro_flush_oldest(struct list_head *head) napi_gro_complete(oldest); } +INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *, + struct sk_buff *)); +INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *, + struct sk_buff *)); static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1); @@ -5553,7 +5562,9 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->csum_valid = 0; } - pp = ptype->callbacks.gro_receive(gro_head, skb); + pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive, + ipv6_gro_receive, inet_gro_receive, + gro_head, skb); break; } rcu_read_unlock(); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 70f525c33cb6..ff8b484d2258 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -164,8 +164,8 @@ static int ipv6_exthdrs_len(struct ipv6hdr *iph, return len; } -static struct sk_buff *ipv6_gro_receive(struct list_head *head, - struct sk_buff *skb) +INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head, + struct sk_buff *skb) { const struct net_offload *ops; struct sk_buff *pp = NULL; @@ -301,7 +301,7 @@ static struct sk_buff *ip4ip6_gro_receive(struct list_head *head, return inet_gro_receive(head, skb); } -static int ipv6_gro_complete(struct sk_buff *skb, int nhoff) +INDIRECT_CALLABLE_SCOPE int ipv6_gro_complete(struct sk_buff *skb, int nhoff) { const struct net_offload *ops; struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff); -- cgit v1.2.3 From 028e0a4766844e7eeb31b93479ea6dd40cfc2895 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 14 Dec 2018 11:51:59 +0100 Subject: net: use indirect call wrappers at GRO transport layer This avoids an indirect call in the receive path for TCP and UDP packets. TCP takes precedence on UDP, so that we have a single additional conditional in the common case. When IPV6 is build as module, all gro symbols except UDPv6 are builtin, while the latter belong to the ipv6 module, so we need some special care. v1 -> v2: - adapted to INDIRECT_CALL_ changes v2 -> v3: - fix build issue with CONFIG_IPV6=m Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/inet_common.h | 7 +++++++ net/ipv4/af_inet.c | 13 +++++++++++-- net/ipv4/tcp_offload.c | 6 ++++-- net/ipv4/udp_offload.c | 7 ++++--- net/ipv6/ip6_offload.c | 29 +++++++++++++++++++++++++++-- net/ipv6/tcpv6_offload.c | 7 ++++--- net/ipv6/udp_offload.c | 7 ++++--- 7 files changed, 61 insertions(+), 15 deletions(-) diff --git a/include/net/inet_common.h b/include/net/inet_common.h index 56e7592811ea..975901a95c0f 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -56,4 +56,11 @@ static inline void inet_ctl_sock_destroy(struct sock *sk) sock_release(sk->sk_socket); } +#define indirect_call_gro_receive(f2, f1, cb, head, skb) \ +({ \ + unlikely(gro_recursion_inc_test(skb)) ? \ + NAPI_GRO_CB(skb)->flush |= 1, NULL : \ + INDIRECT_CALL_2(cb, f2, f1, head, skb); \ +}) + #endif diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 326c422c22f8..0dfb72c46671 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1385,6 +1385,10 @@ out: } EXPORT_SYMBOL(inet_gso_segment); +INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *, + struct sk_buff *)); +INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *, + struct sk_buff *)); struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb) { const struct net_offload *ops; @@ -1494,7 +1498,8 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb) skb_gro_pull(skb, sizeof(*iph)); skb_set_transport_header(skb, skb_gro_offset(skb)); - pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); + pp = indirect_call_gro_receive(tcp4_gro_receive, udp4_gro_receive, + ops->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); @@ -1556,6 +1561,8 @@ int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) return -EINVAL; } +INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *, int)); +INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int)); int inet_gro_complete(struct sk_buff *skb, int nhoff) { __be16 newlen = htons(skb->len - nhoff); @@ -1581,7 +1588,9 @@ int inet_gro_complete(struct sk_buff *skb, int nhoff) * because any hdr with option will have been flushed in * inet_gro_receive(). */ - err = ops->callbacks.gro_complete(skb, nhoff + sizeof(*iph)); + err = INDIRECT_CALL_2(ops->callbacks.gro_complete, + tcp4_gro_complete, udp4_gro_complete, + skb, nhoff + sizeof(*iph)); out_unlock: rcu_read_unlock(); diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 870b0a335061..0fbf7d4df9da 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -10,6 +10,7 @@ * TCPv4 GSO/GRO support */ +#include #include #include #include @@ -305,7 +306,8 @@ int tcp_gro_complete(struct sk_buff *skb) } EXPORT_SYMBOL(tcp_gro_complete); -static struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb) +INDIRECT_CALLABLE_SCOPE +struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb) { /* Don't bother verifying checksum if we're going to flush anyway. */ if (!NAPI_GRO_CB(skb)->flush && @@ -318,7 +320,7 @@ static struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff * return tcp_gro_receive(head, skb); } -static int tcp4_gro_complete(struct sk_buff *skb, int thoff) +INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff) { const struct iphdr *iph = ip_hdr(skb); struct tcphdr *th = tcp_hdr(skb); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 0646d61f4fa8..9a141a6cf1a0 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -13,6 +13,7 @@ #include #include #include +#include static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, netdev_features_t features, @@ -451,8 +452,8 @@ out_unlock: } EXPORT_SYMBOL(udp_gro_receive); -static struct sk_buff *udp4_gro_receive(struct list_head *head, - struct sk_buff *skb) +INDIRECT_CALLABLE_SCOPE +struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb) { struct udphdr *uh = udp_gro_udphdr(skb); @@ -525,7 +526,7 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff, } EXPORT_SYMBOL(udp_gro_complete); -static int udp4_gro_complete(struct sk_buff *skb, int nhoff) +INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff) { const struct iphdr *iph = ip_hdr(skb); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index ff8b484d2258..5c045691c302 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -20,6 +20,23 @@ #include "ip6_offload.h" +/* All GRO functions are always builtin, except UDP over ipv6, which lays in + * ipv6 module, as it depends on UDPv6 lookup function, so we need special care + * when ipv6 is built as a module + */ +#if IS_BUILTIN(CONFIG_IPV6) +#define INDIRECT_CALL_L4(f, f2, f1, ...) INDIRECT_CALL_2(f, f2, f1, __VA_ARGS__) +#else +#define INDIRECT_CALL_L4(f, f2, f1, ...) INDIRECT_CALL_1(f, f2, __VA_ARGS__) +#endif + +#define indirect_call_gro_receive_l4(f2, f1, cb, head, skb) \ +({ \ + unlikely(gro_recursion_inc_test(skb)) ? \ + NAPI_GRO_CB(skb)->flush |= 1, NULL : \ + INDIRECT_CALL_L4(cb, f2, f1, head, skb); \ +}) + static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto) { const struct net_offload *ops = NULL; @@ -164,6 +181,10 @@ static int ipv6_exthdrs_len(struct ipv6hdr *iph, return len; } +INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp6_gro_receive(struct list_head *, + struct sk_buff *)); +INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *, + struct sk_buff *)); INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head, struct sk_buff *skb) { @@ -260,7 +281,8 @@ not_same_flow: skb_gro_postpull_rcsum(skb, iph, nlen); - pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); + pp = indirect_call_gro_receive_l4(tcp6_gro_receive, udp6_gro_receive, + ops->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); @@ -301,6 +323,8 @@ static struct sk_buff *ip4ip6_gro_receive(struct list_head *head, return inet_gro_receive(head, skb); } +INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *, int)); +INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int)); INDIRECT_CALLABLE_SCOPE int ipv6_gro_complete(struct sk_buff *skb, int nhoff) { const struct net_offload *ops; @@ -320,7 +344,8 @@ INDIRECT_CALLABLE_SCOPE int ipv6_gro_complete(struct sk_buff *skb, int nhoff) if (WARN_ON(!ops || !ops->callbacks.gro_complete)) goto out_unlock; - err = ops->callbacks.gro_complete(skb, nhoff); + err = INDIRECT_CALL_L4(ops->callbacks.gro_complete, tcp6_gro_complete, + udp6_gro_complete, skb, nhoff); out_unlock: rcu_read_unlock(); diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index e72947c99454..3179c425d7ff 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -9,14 +9,15 @@ * * TCPv6 GSO/GRO support */ +#include #include #include #include #include #include "ip6_offload.h" -static struct sk_buff *tcp6_gro_receive(struct list_head *head, - struct sk_buff *skb) +INDIRECT_CALLABLE_SCOPE +struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb) { /* Don't bother verifying checksum if we're going to flush anyway. */ if (!NAPI_GRO_CB(skb)->flush && @@ -29,7 +30,7 @@ static struct sk_buff *tcp6_gro_receive(struct list_head *head, return tcp_gro_receive(head, skb); } -static int tcp6_gro_complete(struct sk_buff *skb, int thoff) +INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff) { const struct ipv6hdr *iph = ipv6_hdr(skb); struct tcphdr *th = tcp_hdr(skb); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 828b2457f97b..83b11d0ac091 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -11,6 +11,7 @@ */ #include #include +#include #include #include #include @@ -114,8 +115,8 @@ out: return segs; } -static struct sk_buff *udp6_gro_receive(struct list_head *head, - struct sk_buff *skb) +INDIRECT_CALLABLE_SCOPE +struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb) { struct udphdr *uh = udp_gro_udphdr(skb); @@ -142,7 +143,7 @@ flush: return NULL; } -static int udp6_gro_complete(struct sk_buff *skb, int nhoff) +INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff) { const struct ipv6hdr *ipv6h = ipv6_hdr(skb); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); -- cgit v1.2.3 From 4f24ed77dec9b067d08f7958a287cbf48665f35e Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 14 Dec 2018 11:52:00 +0100 Subject: udp: use indirect call wrappers for GRO socket lookup This avoids another indirect call for UDP GRO. Again, the test for the IPv6 variant is performed first. v1 -> v2: - adapted to INDIRECT_CALL_ changes Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/ipv4/udp_offload.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 9a141a6cf1a0..64f9715173ac 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -392,6 +392,8 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, return NULL; } +INDIRECT_CALLABLE_DECLARE(struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, + __be16 sport, __be16 dport)); struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, struct udphdr *uh, udp_lookup_t lookup) { @@ -403,7 +405,8 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, struct sock *sk; rcu_read_lock(); - sk = (*lookup)(skb, uh->source, uh->dest); + sk = INDIRECT_CALL_INET(lookup, udp6_lib_lookup_skb, + udp4_lib_lookup_skb, skb, uh->source, uh->dest); if (!sk) goto out_unlock; @@ -503,7 +506,8 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff, uh->len = newlen; rcu_read_lock(); - sk = (*lookup)(skb, uh->source, uh->dest); + sk = INDIRECT_CALL_INET(lookup, udp6_lib_lookup_skb, + udp4_lib_lookup_skb, skb, uh->source, uh->dest); if (sk && udp_sk(sk)->gro_enabled) { err = udp_gro_complete_segment(skb); } else if (sk && udp_sk(sk)->gro_complete) { -- cgit v1.2.3