diff options
Diffstat (limited to 'target/linux/generic/pending-6.1/680-net-add-TCP-fraglist-GRO-support.patch')
-rw-r--r-- | target/linux/generic/pending-6.1/680-net-add-TCP-fraglist-GRO-support.patch | 627 |
1 files changed, 627 insertions, 0 deletions
diff --git a/target/linux/generic/pending-6.1/680-net-add-TCP-fraglist-GRO-support.patch b/target/linux/generic/pending-6.1/680-net-add-TCP-fraglist-GRO-support.patch new file mode 100644 index 0000000000..f52233fe90 --- /dev/null +++ b/target/linux/generic/pending-6.1/680-net-add-TCP-fraglist-GRO-support.patch @@ -0,0 +1,627 @@ +From: Felix Fietkau <nbd@nbd.name> +Date: Tue, 23 Apr 2024 11:23:03 +0200 +Subject: [PATCH] net: add TCP fraglist GRO support + +When forwarding TCP after GRO, software segmentation is very expensive, +especially when the checksum needs to be recalculated. +One case where that's currently unavoidable is when routing packets over +PPPoE. Performance improves significantly when using fraglist GRO +implemented in the same way as for UDP. + +Here's a measurement of running 2 TCP streams through a MediaTek MT7622 +device (2-core Cortex-A53), which runs NAT with flow offload enabled from +one ethernet port to PPPoE on another ethernet port + cake qdisc set to +1Gbps. + +rx-gro-list off: 630 Mbit/s, CPU 35% idle +rx-gro-list on: 770 Mbit/s, CPU 40% idle + +Signe-off-by: Felix Fietkau <nbd@nbd.name> +--- + +--- a/include/net/gro.h ++++ b/include/net/gro.h +@@ -424,6 +424,7 @@ static inline __wsum ip6_gro_compute_pse + } + + int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb); ++int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb); + + /* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ + static inline void gro_normal_list(struct napi_struct *napi) +@@ -446,5 +447,48 @@ static inline void gro_normal_one(struct + gro_normal_list(napi); + } + ++/* This function is the alternative of 'inet_iif' and 'inet_sdif' ++ * functions in case we can not rely on fields of IPCB. ++ * ++ * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized. ++ * The caller must hold the RCU read lock. ++ */ ++static inline void inet_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif) ++{ ++ *iif = inet_iif(skb) ?: skb->dev->ifindex; ++ *sdif = 0; ++ ++#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) ++ if (netif_is_l3_slave(skb->dev)) { ++ struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev); ++ ++ *sdif = *iif; ++ *iif = master ? master->ifindex : 0; ++ } ++#endif ++} ++ ++/* This function is the alternative of 'inet6_iif' and 'inet6_sdif' ++ * functions in case we can not rely on fields of IP6CB. ++ * ++ * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized. ++ * The caller must hold the RCU read lock. ++ */ ++static inline void inet6_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif) ++{ ++ /* using skb->dev->ifindex because skb_dst(skb) is not initialized */ ++ *iif = skb->dev->ifindex; ++ *sdif = 0; ++ ++#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) ++ if (netif_is_l3_slave(skb->dev)) { ++ struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev); ++ ++ *sdif = *iif; ++ *iif = master ? master->ifindex : 0; ++ } ++#endif ++} ++ + + #endif /* _NET_IPV6_GRO_H */ +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -2057,7 +2057,10 @@ void tcp_v4_destroy_sock(struct sock *sk + + struct sk_buff *tcp_gso_segment(struct sk_buff *skb, + netdev_features_t features); +-struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb); ++struct tcphdr *tcp_gro_pull_header(struct sk_buff *skb); ++struct sk_buff *tcp_gro_lookup(struct list_head *head, struct tcphdr *th); ++struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb, ++ struct tcphdr *th); + INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff)); + INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)); + INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff)); +--- a/net/core/gro.c ++++ b/net/core/gro.c +@@ -290,6 +290,33 @@ done: + return 0; + } + ++int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb) ++{ ++ if (unlikely(p->len + skb->len >= 65536)) ++ return -E2BIG; ++ ++ if (NAPI_GRO_CB(p)->last == p) ++ skb_shinfo(p)->frag_list = skb; ++ else ++ NAPI_GRO_CB(p)->last->next = skb; ++ ++ skb_pull(skb, skb_gro_offset(skb)); ++ ++ NAPI_GRO_CB(p)->last = skb; ++ NAPI_GRO_CB(p)->count++; ++ p->data_len += skb->len; ++ ++ /* sk ownership - if any - completely transferred to the aggregated packet */ ++ skb->destructor = NULL; ++ skb->sk = NULL; ++ p->truesize += skb->truesize; ++ p->len += skb->len; ++ ++ NAPI_GRO_CB(skb)->same_flow = 1; ++ ++ return 0; ++} ++ + + static void napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb) + { +--- a/net/ipv4/tcp_offload.c ++++ b/net/ipv4/tcp_offload.c +@@ -27,6 +27,70 @@ static void tcp_gso_tstamp(struct sk_buf + } + } + ++static void __tcpv4_gso_segment_csum(struct sk_buff *seg, ++ __be32 *oldip, __be32 newip, ++ __be16 *oldport, __be16 newport) ++{ ++ struct tcphdr *th; ++ struct iphdr *iph; ++ ++ if (*oldip == newip && *oldport == newport) ++ return; ++ ++ th = tcp_hdr(seg); ++ iph = ip_hdr(seg); ++ ++ inet_proto_csum_replace4(&th->check, seg, *oldip, newip, true); ++ inet_proto_csum_replace2(&th->check, seg, *oldport, newport, false); ++ *oldport = newport; ++ ++ csum_replace4(&iph->check, *oldip, newip); ++ *oldip = newip; ++} ++ ++static struct sk_buff *__tcpv4_gso_segment_list_csum(struct sk_buff *segs) ++{ ++ const struct tcphdr *th; ++ const struct iphdr *iph; ++ struct sk_buff *seg; ++ struct tcphdr *th2; ++ struct iphdr *iph2; ++ ++ seg = segs; ++ th = tcp_hdr(seg); ++ iph = ip_hdr(seg); ++ th2 = tcp_hdr(seg->next); ++ iph2 = ip_hdr(seg->next); ++ ++ if (!(*(const u32 *)&th->source ^ *(const u32 *)&th2->source) && ++ iph->daddr == iph2->daddr && iph->saddr == iph2->saddr) ++ return segs; ++ ++ while ((seg = seg->next)) { ++ th2 = tcp_hdr(seg); ++ iph2 = ip_hdr(seg); ++ ++ __tcpv4_gso_segment_csum(seg, ++ &iph2->saddr, iph->saddr, ++ &th2->source, th->source); ++ __tcpv4_gso_segment_csum(seg, ++ &iph2->daddr, iph->daddr, ++ &th2->dest, th->dest); ++ } ++ ++ return segs; ++} ++ ++static struct sk_buff *__tcp4_gso_segment_list(struct sk_buff *skb, ++ netdev_features_t features) ++{ ++ skb = skb_segment_list(skb, features, skb_mac_header_len(skb)); ++ if (IS_ERR(skb)) ++ return skb; ++ ++ return __tcpv4_gso_segment_list_csum(skb); ++} ++ + static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, + netdev_features_t features) + { +@@ -36,6 +100,9 @@ static struct sk_buff *tcp4_gso_segment( + if (!pskb_may_pull(skb, sizeof(struct tcphdr))) + return ERR_PTR(-EINVAL); + ++ if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST) ++ return __tcp4_gso_segment_list(skb, features); ++ + if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { + const struct iphdr *iph = ip_hdr(skb); + struct tcphdr *th = tcp_hdr(skb); +@@ -177,61 +244,76 @@ out: + return segs; + } + +-struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb) ++struct sk_buff *tcp_gro_lookup(struct list_head *head, struct tcphdr *th) + { +- struct sk_buff *pp = NULL; ++ struct tcphdr *th2; + struct sk_buff *p; ++ ++ list_for_each_entry(p, head, list) { ++ if (!NAPI_GRO_CB(p)->same_flow) ++ continue; ++ ++ th2 = tcp_hdr(p); ++ if (*(u32 *)&th->source ^ *(u32 *)&th2->source) { ++ NAPI_GRO_CB(p)->same_flow = 0; ++ continue; ++ } ++ ++ return p; ++ } ++ ++ return NULL; ++} ++ ++struct tcphdr *tcp_gro_pull_header(struct sk_buff *skb) ++{ ++ unsigned int thlen, hlen, off; + struct tcphdr *th; +- struct tcphdr *th2; +- unsigned int len; +- unsigned int thlen; +- __be32 flags; +- unsigned int mss = 1; +- unsigned int hlen; +- unsigned int off; +- int flush = 1; +- int i; + + off = skb_gro_offset(skb); + hlen = off + sizeof(*th); + th = skb_gro_header(skb, hlen, off); + if (unlikely(!th)) +- goto out; ++ return NULL; + + thlen = th->doff * 4; + if (thlen < sizeof(*th)) +- goto out; ++ return NULL; + + hlen = off + thlen; + if (skb_gro_header_hard(skb, hlen)) { + th = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!th)) +- goto out; ++ return NULL; + } + + skb_gro_pull(skb, thlen); + +- len = skb_gro_len(skb); +- flags = tcp_flag_word(th); +- +- list_for_each_entry(p, head, list) { +- if (!NAPI_GRO_CB(p)->same_flow) +- continue; ++ return th; ++} + +- th2 = tcp_hdr(p); ++struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb, ++ struct tcphdr *th) ++{ ++ unsigned int thlen = th->doff * 4; ++ struct sk_buff *pp = NULL; ++ struct sk_buff *p; ++ struct tcphdr *th2; ++ unsigned int len; ++ __be32 flags; ++ unsigned int mss = 1; ++ int flush = 1; ++ int i; + +- if (*(u32 *)&th->source ^ *(u32 *)&th2->source) { +- NAPI_GRO_CB(p)->same_flow = 0; +- continue; +- } ++ len = skb_gro_len(skb); ++ flags = tcp_flag_word(th); + +- goto found; +- } +- p = NULL; +- goto out_check_final; ++ p = tcp_gro_lookup(head, th); ++ if (!p) ++ goto out_check_final; + +-found: + /* Include the IP ID check below from the inner most IP hdr */ ++ th2 = tcp_hdr(p); + flush = NAPI_GRO_CB(p)->flush; + flush |= (__force int)(flags & TCP_FLAG_CWR); + flush |= (__force int)((flags ^ tcp_flag_word(th2)) & +@@ -268,6 +350,19 @@ found: + flush |= p->decrypted ^ skb->decrypted; + #endif + ++ if (unlikely(NAPI_GRO_CB(p)->is_flist)) { ++ flush |= (__force int)(flags ^ tcp_flag_word(th2)); ++ flush |= skb->ip_summed != p->ip_summed; ++ flush |= skb->csum_level != p->csum_level; ++ flush |= !pskb_may_pull(skb, skb_gro_offset(skb)); ++ flush |= NAPI_GRO_CB(p)->count >= 64; ++ ++ if (flush || skb_gro_receive_list(p, skb)) ++ mss = 1; ++ ++ goto out_check_final; ++ } ++ + if (flush || skb_gro_receive(p, skb)) { + mss = 1; + goto out_check_final; +@@ -289,7 +384,6 @@ out_check_final: + if (p && (!NAPI_GRO_CB(skb)->same_flow || flush)) + pp = p; + +-out: + NAPI_GRO_CB(skb)->flush |= (flush != 0); + + return pp; +@@ -315,18 +409,58 @@ int tcp_gro_complete(struct sk_buff *skb + } + EXPORT_SYMBOL(tcp_gro_complete); + ++static void tcp4_check_fraglist_gro(struct list_head *head, struct sk_buff *skb, ++ struct tcphdr *th) ++{ ++ const struct iphdr *iph; ++ struct sk_buff *p; ++ struct sock *sk; ++ struct net *net; ++ int iif, sdif; ++ ++ if (!(skb->dev->features & NETIF_F_GRO_FRAGLIST)) ++ return; ++ ++ p = tcp_gro_lookup(head, th); ++ if (p) { ++ NAPI_GRO_CB(skb)->is_flist = NAPI_GRO_CB(p)->is_flist; ++ return; ++ } ++ ++ inet_get_iif_sdif(skb, &iif, &sdif); ++ iph = skb_gro_network_header(skb); ++ net = dev_net(skb->dev); ++ sk = __inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, ++ iph->saddr, th->source, ++ iph->daddr, ntohs(th->dest), ++ iif, sdif); ++ NAPI_GRO_CB(skb)->is_flist = !sk; ++ if (sk) ++ sock_put(sk); ++} ++ + INDIRECT_CALLABLE_SCOPE + struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb) + { ++ struct tcphdr *th; ++ + /* Don't bother verifying checksum if we're going to flush anyway. */ + if (!NAPI_GRO_CB(skb)->flush && + skb_gro_checksum_validate(skb, IPPROTO_TCP, +- inet_gro_compute_pseudo)) { +- NAPI_GRO_CB(skb)->flush = 1; +- return NULL; +- } ++ inet_gro_compute_pseudo)) ++ goto flush; ++ ++ th = tcp_gro_pull_header(skb); ++ if (!th) ++ goto flush; + +- return tcp_gro_receive(head, skb); ++ tcp4_check_fraglist_gro(head, skb, th); ++ ++ return tcp_gro_receive(head, skb, th); ++ ++flush: ++ NAPI_GRO_CB(skb)->flush = 1; ++ return NULL; + } + + INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff) +@@ -334,6 +468,15 @@ INDIRECT_CALLABLE_SCOPE int tcp4_gro_com + const struct iphdr *iph = ip_hdr(skb); + struct tcphdr *th = tcp_hdr(skb); + ++ if (unlikely(NAPI_GRO_CB(skb)->is_flist)) { ++ skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST | SKB_GSO_TCPV4; ++ skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; ++ ++ __skb_incr_checksum_unnecessary(skb); ++ ++ return 0; ++ } ++ + th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr, + iph->daddr, 0); + skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; +--- a/net/ipv4/udp_offload.c ++++ b/net/ipv4/udp_offload.c +@@ -425,33 +425,6 @@ out: + return segs; + } + +-static int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb) +-{ +- if (unlikely(p->len + skb->len >= 65536)) +- return -E2BIG; +- +- if (NAPI_GRO_CB(p)->last == p) +- skb_shinfo(p)->frag_list = skb; +- else +- NAPI_GRO_CB(p)->last->next = skb; +- +- skb_pull(skb, skb_gro_offset(skb)); +- +- NAPI_GRO_CB(p)->last = skb; +- NAPI_GRO_CB(p)->count++; +- p->data_len += skb->len; +- +- /* sk ownership - if any - completely transferred to the aggregated packet */ +- skb->destructor = NULL; +- skb->sk = NULL; +- p->truesize += skb->truesize; +- p->len += skb->len; +- +- NAPI_GRO_CB(skb)->same_flow = 1; +- +- return 0; +-} +- + + #define UDP_GRO_CNT_MAX 64 + static struct sk_buff *udp_gro_receive_segment(struct list_head *head, +--- a/net/ipv6/tcpv6_offload.c ++++ b/net/ipv6/tcpv6_offload.c +@@ -7,24 +7,67 @@ + */ + #include <linux/indirect_call_wrapper.h> + #include <linux/skbuff.h> ++#include <net/inet6_hashtables.h> + #include <net/gro.h> + #include <net/protocol.h> + #include <net/tcp.h> + #include <net/ip6_checksum.h> + #include "ip6_offload.h" + ++static void tcp6_check_fraglist_gro(struct list_head *head, struct sk_buff *skb, ++ struct tcphdr *th) ++{ ++#if IS_ENABLED(CONFIG_IPV6) ++ const struct ipv6hdr *hdr; ++ struct sk_buff *p; ++ struct sock *sk; ++ struct net *net; ++ int iif, sdif; ++ ++ if (!(skb->dev->features & NETIF_F_GRO_FRAGLIST)) ++ return; ++ ++ p = tcp_gro_lookup(head, th); ++ if (p) { ++ NAPI_GRO_CB(skb)->is_flist = NAPI_GRO_CB(p)->is_flist; ++ return; ++ } ++ ++ inet6_get_iif_sdif(skb, &iif, &sdif); ++ hdr = skb_gro_network_header(skb); ++ net = dev_net(skb->dev); ++ sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, ++ &hdr->saddr, th->source, ++ &hdr->daddr, ntohs(th->dest), ++ iif, sdif); ++ NAPI_GRO_CB(skb)->is_flist = !sk; ++ if (sk) ++ sock_put(sk); ++#endif /* IS_ENABLED(CONFIG_IPV6) */ ++} ++ + INDIRECT_CALLABLE_SCOPE + struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb) + { ++ struct tcphdr *th; ++ + /* Don't bother verifying checksum if we're going to flush anyway. */ + if (!NAPI_GRO_CB(skb)->flush && + skb_gro_checksum_validate(skb, IPPROTO_TCP, +- ip6_gro_compute_pseudo)) { +- NAPI_GRO_CB(skb)->flush = 1; +- return NULL; +- } ++ ip6_gro_compute_pseudo)) ++ goto flush; + +- return tcp_gro_receive(head, skb); ++ th = tcp_gro_pull_header(skb); ++ if (!th) ++ goto flush; ++ ++ tcp6_check_fraglist_gro(head, skb, th); ++ ++ return tcp_gro_receive(head, skb, th); ++ ++flush: ++ NAPI_GRO_CB(skb)->flush = 1; ++ return NULL; + } + + INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff) +@@ -32,6 +75,15 @@ INDIRECT_CALLABLE_SCOPE int tcp6_gro_com + const struct ipv6hdr *iph = ipv6_hdr(skb); + struct tcphdr *th = tcp_hdr(skb); + ++ if (unlikely(NAPI_GRO_CB(skb)->is_flist)) { ++ skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST | SKB_GSO_TCPV6; ++ skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; ++ ++ __skb_incr_checksum_unnecessary(skb); ++ ++ return 0; ++ } ++ + th->check = ~tcp_v6_check(skb->len - thoff, &iph->saddr, + &iph->daddr, 0); + skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6; +@@ -39,6 +91,61 @@ INDIRECT_CALLABLE_SCOPE int tcp6_gro_com + return tcp_gro_complete(skb); + } + ++static void __tcpv6_gso_segment_csum(struct sk_buff *seg, ++ __be16 *oldport, __be16 newport) ++{ ++ struct tcphdr *th; ++ ++ if (*oldport == newport) ++ return; ++ ++ th = tcp_hdr(seg); ++ inet_proto_csum_replace2(&th->check, seg, *oldport, newport, false); ++ *oldport = newport; ++} ++ ++static struct sk_buff *__tcpv6_gso_segment_list_csum(struct sk_buff *segs) ++{ ++ const struct tcphdr *th; ++ const struct ipv6hdr *iph; ++ struct sk_buff *seg; ++ struct tcphdr *th2; ++ struct ipv6hdr *iph2; ++ ++ seg = segs; ++ th = tcp_hdr(seg); ++ iph = ipv6_hdr(seg); ++ th2 = tcp_hdr(seg->next); ++ iph2 = ipv6_hdr(seg->next); ++ ++ if (!(*(const u32 *)&th->source ^ *(const u32 *)&th2->source) && ++ ipv6_addr_equal(&iph->saddr, &iph2->saddr) && ++ ipv6_addr_equal(&iph->daddr, &iph2->daddr)) ++ return segs; ++ ++ while ((seg = seg->next)) { ++ th2 = tcp_hdr(seg); ++ iph2 = ipv6_hdr(seg); ++ ++ iph2->saddr = iph->saddr; ++ iph2->daddr = iph->daddr; ++ __tcpv6_gso_segment_csum(seg, &th2->source, th->source); ++ __tcpv6_gso_segment_csum(seg, &th2->dest, th->dest); ++ } ++ ++ return segs; ++} ++ ++static struct sk_buff *__tcp6_gso_segment_list(struct sk_buff *skb, ++ netdev_features_t features) ++{ ++ skb = skb_segment_list(skb, features, skb_mac_header_len(skb)); ++ if (IS_ERR(skb)) ++ return skb; ++ ++ return __tcpv6_gso_segment_list_csum(skb); ++} ++ + static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb, + netdev_features_t features) + { +@@ -50,6 +157,9 @@ static struct sk_buff *tcp6_gso_segment( + if (!pskb_may_pull(skb, sizeof(*th))) + return ERR_PTR(-EINVAL); + ++ if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST) ++ return __tcp6_gso_segment_list(skb, features); ++ + if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { + const struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct tcphdr *th = tcp_hdr(skb); |