diff options
Diffstat (limited to 'net/ipv6')
-rw-r--r-- | net/ipv6/ip6_fib.c | 108 | ||||
-rw-r--r-- | net/ipv6/route.c | 93 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 124 | ||||
-rw-r--r-- | net/ipv6/udp.c | 3 | ||||
-rw-r--r-- | net/ipv6/udp_offload.c | 29 |
5 files changed, 295 insertions, 62 deletions
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index cfae0a1529a1..58fbde244381 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -370,6 +370,21 @@ static int call_fib6_entry_notifier(struct notifier_block *nb, return call_fib6_notifier(nb, event_type, &info.info); } +static int call_fib6_multipath_entry_notifier(struct notifier_block *nb, + enum fib_event_type event_type, + struct fib6_info *rt, + unsigned int nsiblings, + struct netlink_ext_ack *extack) +{ + struct fib6_entry_notifier_info info = { + .info.extack = extack, + .rt = rt, + .nsiblings = nsiblings, + }; + + return call_fib6_notifier(nb, event_type, &info.info); +} + int call_fib6_entry_notifiers(struct net *net, enum fib_event_type event_type, struct fib6_info *rt, @@ -400,6 +415,17 @@ int call_fib6_multipath_entry_notifiers(struct net *net, return call_fib6_notifiers(net, event_type, &info.info); } +int call_fib6_entry_notifiers_replace(struct net *net, struct fib6_info *rt) +{ + struct fib6_entry_notifier_info info = { + .rt = rt, + .nsiblings = rt->fib6_nsiblings, + }; + + rt->fib6_table->fib_seq++; + return call_fib6_notifiers(net, FIB_EVENT_ENTRY_REPLACE, &info.info); +} + struct fib6_dump_arg { struct net *net; struct notifier_block *nb; @@ -408,22 +434,29 @@ struct fib6_dump_arg { static int fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg) { - if (rt == arg->net->ipv6.fib6_null_entry) + enum fib_event_type fib_event = FIB_EVENT_ENTRY_REPLACE; + int err; + + if (!rt || rt == arg->net->ipv6.fib6_null_entry) return 0; - return call_fib6_entry_notifier(arg->nb, FIB_EVENT_ENTRY_ADD, - rt, arg->extack); + + if (rt->fib6_nsiblings) + err = call_fib6_multipath_entry_notifier(arg->nb, fib_event, + rt, + rt->fib6_nsiblings, + arg->extack); + else + err = call_fib6_entry_notifier(arg->nb, fib_event, rt, + arg->extack); + + return err; } static int fib6_node_dump(struct fib6_walker *w) { - struct fib6_info *rt; - int err = 0; + int err; - for_each_fib6_walker_rt(w) { - err = fib6_rt_dump(rt, w->args); - if (err) - break; - } + err = fib6_rt_dump(w->leaf, w->args); w->leaf = NULL; return err; } @@ -1039,6 +1072,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, (info->nlh->nlmsg_flags & NLM_F_CREATE)); int found = 0; bool rt_can_ecmp = rt6_qualify_for_ecmp(rt); + bool notify_sibling_rt = false; u16 nlflags = NLM_F_EXCL; int err; @@ -1130,6 +1164,7 @@ next_iter: /* Find the first route that have the same metric */ sibling = leaf; + notify_sibling_rt = true; while (sibling) { if (sibling->fib6_metric == rt->fib6_metric && rt6_qualify_for_ecmp(sibling)) { @@ -1139,6 +1174,7 @@ next_iter: } sibling = rcu_dereference_protected(sibling->fib6_next, lockdep_is_held(&rt->fib6_table->tb6_lock)); + notify_sibling_rt = false; } /* For each sibling in the list, increment the counter of * siblings. BUG() if counters does not match, list of siblings @@ -1165,10 +1201,21 @@ next_iter: add: nlflags |= NLM_F_CREATE; - if (!info->skip_notify_kernel) { + /* The route should only be notified if it is the first + * route in the node or if it is added as a sibling + * route to the first route in the node. + */ + if (!info->skip_notify_kernel && + (notify_sibling_rt || ins == &fn->leaf)) { + enum fib_event_type fib_event; + + if (notify_sibling_rt) + fib_event = FIB_EVENT_ENTRY_APPEND; + else + fib_event = FIB_EVENT_ENTRY_REPLACE; err = call_fib6_entry_notifiers(info->nl_net, - FIB_EVENT_ENTRY_ADD, - rt, extack); + fib_event, rt, + extack); if (err) { struct fib6_info *sibling, *next_sibling; @@ -1212,7 +1259,7 @@ add: return -ENOENT; } - if (!info->skip_notify_kernel) { + if (!info->skip_notify_kernel && ins == &fn->leaf) { err = call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE, rt, extack); @@ -1845,13 +1892,29 @@ static struct fib6_node *fib6_repair_tree(struct net *net, static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, struct fib6_info __rcu **rtp, struct nl_info *info) { + struct fib6_info *leaf, *replace_rt = NULL; struct fib6_walker *w; struct fib6_info *rt = rcu_dereference_protected(*rtp, lockdep_is_held(&table->tb6_lock)); struct net *net = info->nl_net; + bool notify_del = false; RT6_TRACE("fib6_del_route\n"); + /* If the deleted route is the first in the node and it is not part of + * a multipath route, then we need to replace it with the next route + * in the node, if exists. + */ + leaf = rcu_dereference_protected(fn->leaf, + lockdep_is_held(&table->tb6_lock)); + if (leaf == rt && !rt->fib6_nsiblings) { + if (rcu_access_pointer(rt->fib6_next)) + replace_rt = rcu_dereference_protected(rt->fib6_next, + lockdep_is_held(&table->tb6_lock)); + else + notify_del = true; + } + /* Unlink it */ *rtp = rt->fib6_next; rt->fib6_node = NULL; @@ -1869,6 +1932,14 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, if (rt->fib6_nsiblings) { struct fib6_info *sibling, *next_sibling; + /* The route is deleted from a multipath route. If this + * multipath route is the first route in the node, then we need + * to emit a delete notification. Otherwise, we need to skip + * the notification. + */ + if (rt->fib6_metric == leaf->fib6_metric && + rt6_qualify_for_ecmp(leaf)) + notify_del = true; list_for_each_entry_safe(sibling, next_sibling, &rt->fib6_siblings, fib6_siblings) sibling->fib6_nsiblings--; @@ -1904,8 +1975,13 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, fib6_purge_rt(rt, fn, net); - if (!info->skip_notify_kernel) - call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL); + if (!info->skip_notify_kernel) { + if (notify_del) + call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, + rt, NULL); + else if (replace_rt) + call_fib6_entry_notifiers_replace(net, replace_rt); + } if (!info->skip_notify) inet6_rt_notify(RTM_DELROUTE, rt, info, 0); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index affb51c11a25..4fbdc60b4e07 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3757,6 +3757,7 @@ static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg) if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) { struct fib6_info *sibling, *next_sibling; + struct fib6_node *fn; /* prefer to send a single notification with all hops */ skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any()); @@ -3772,12 +3773,32 @@ static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg) info->skip_notify = 1; } + /* 'rt' points to the first sibling route. If it is not the + * leaf, then we do not need to send a notification. Otherwise, + * we need to check if the last sibling has a next route or not + * and emit a replace or delete notification, respectively. + */ info->skip_notify_kernel = 1; - call_fib6_multipath_entry_notifiers(net, - FIB_EVENT_ENTRY_DEL, - rt, - rt->fib6_nsiblings, - NULL); + fn = rcu_dereference_protected(rt->fib6_node, + lockdep_is_held(&table->tb6_lock)); + if (rcu_access_pointer(fn->leaf) == rt) { + struct fib6_info *last_sibling, *replace_rt; + + last_sibling = list_last_entry(&rt->fib6_siblings, + struct fib6_info, + fib6_siblings); + replace_rt = rcu_dereference_protected( + last_sibling->fib6_next, + lockdep_is_held(&table->tb6_lock)); + if (replace_rt) + call_fib6_entry_notifiers_replace(net, + replace_rt); + else + call_fib6_multipath_entry_notifiers(net, + FIB_EVENT_ENTRY_DEL, + rt, rt->fib6_nsiblings, + NULL); + } list_for_each_entry_safe(sibling, next_sibling, &rt->fib6_siblings, fib6_siblings) { @@ -5025,12 +5046,37 @@ static void ip6_route_mpath_notify(struct fib6_info *rt, inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); } +static bool ip6_route_mpath_should_notify(const struct fib6_info *rt) +{ + bool rt_can_ecmp = rt6_qualify_for_ecmp(rt); + bool should_notify = false; + struct fib6_info *leaf; + struct fib6_node *fn; + + rcu_read_lock(); + fn = rcu_dereference(rt->fib6_node); + if (!fn) + goto out; + + leaf = rcu_dereference(fn->leaf); + if (!leaf) + goto out; + + if (rt == leaf || + (rt_can_ecmp && rt->fib6_metric == leaf->fib6_metric && + rt6_qualify_for_ecmp(leaf))) + should_notify = true; +out: + rcu_read_unlock(); + + return should_notify; +} + static int ip6_route_multipath_add(struct fib6_config *cfg, struct netlink_ext_ack *extack) { struct fib6_info *rt_notif = NULL, *rt_last = NULL; struct nl_info *info = &cfg->fc_nlinfo; - enum fib_event_type event_type; struct fib6_config r_cfg; struct rtnexthop *rtnh; struct fib6_info *rt; @@ -5155,13 +5201,27 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, nhn++; } - event_type = replace ? FIB_EVENT_ENTRY_REPLACE : FIB_EVENT_ENTRY_ADD; - err = call_fib6_multipath_entry_notifiers(info->nl_net, event_type, - rt_notif, nhn - 1, extack); - if (err) { - /* Delete all the siblings that were just added */ - err_nh = NULL; - goto add_errout; + /* An in-kernel notification should only be sent in case the new + * multipath route is added as the first route in the node, or if + * it was appended to it. We pass 'rt_notif' since it is the first + * sibling and might allow us to skip some checks in the replace case. + */ + if (ip6_route_mpath_should_notify(rt_notif)) { + enum fib_event_type fib_event; + + if (rt_notif->fib6_nsiblings != nhn - 1) + fib_event = FIB_EVENT_ENTRY_APPEND; + else + fib_event = FIB_EVENT_ENTRY_REPLACE; + + err = call_fib6_multipath_entry_notifiers(info->nl_net, + fib_event, rt_notif, + nhn - 1, extack); + if (err) { + /* Delete all the siblings that were just added */ + err_nh = NULL; + goto add_errout; + } } /* success ... tell user about new route */ @@ -5516,6 +5576,13 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, expires -= jiffies; } + if (!dst) { + if (rt->offload) + rtm->rtm_flags |= RTM_F_OFFLOAD; + if (rt->trap) + rtm->rtm_flags |= RTM_F_TRAP; + } + if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0) goto nla_put_failure; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index df5fd9109696..33a578a3eb3a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -75,13 +75,14 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); static const struct inet_connection_sock_af_ops ipv6_mapped; -static const struct inet_connection_sock_af_ops ipv6_specific; +const struct inet_connection_sock_af_ops ipv6_specific; #ifdef CONFIG_TCP_MD5SIG static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; #else static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, - const struct in6_addr *addr) + const struct in6_addr *addr, + int l3index) { return NULL; } @@ -237,6 +238,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; icsk->icsk_af_ops = &ipv6_mapped; + if (sk_is_mptcp(sk)) + mptcp_handle_ipv6_mapped(sk, true); sk->sk_backlog_rcv = tcp_v4_do_rcv; #ifdef CONFIG_TCP_MD5SIG tp->af_specific = &tcp_sock_ipv6_mapped_specific; @@ -247,6 +250,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (err) { icsk->icsk_ext_hdr_len = exthdrlen; icsk->icsk_af_ops = &ipv6_specific; + if (sk_is_mptcp(sk)) + mptcp_handle_ipv6_mapped(sk, false); sk->sk_backlog_rcv = tcp_v6_do_rcv; #ifdef CONFIG_TCP_MD5SIG tp->af_specific = &tcp_sock_ipv6_specific; @@ -532,15 +537,22 @@ static void tcp_v6_reqsk_destructor(struct request_sock *req) #ifdef CONFIG_TCP_MD5SIG static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, - const struct in6_addr *addr) + const struct in6_addr *addr, + int l3index) { - return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6); + return tcp_md5_do_lookup(sk, l3index, + (union tcp_md5_addr *)addr, AF_INET6); } static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk, const struct sock *addr_sk) { - return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr); + int l3index; + + l3index = l3mdev_master_ifindex_by_index(sock_net(sk), + addr_sk->sk_bound_dev_if); + return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr, + l3index); } static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, @@ -548,6 +560,7 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, { struct tcp_md5sig cmd; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; + int l3index = 0; u8 prefixlen; if (optlen < sizeof(cmd)) @@ -569,12 +582,30 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128; } + if (optname == TCP_MD5SIG_EXT && + cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) { + struct net_device *dev; + + rcu_read_lock(); + dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex); + if (dev && netif_is_l3_master(dev)) + l3index = dev->ifindex; + rcu_read_unlock(); + + /* ok to reference set/not set outside of rcu; + * right now device MUST be an L3 master + */ + if (!dev || !l3index) + return -EINVAL; + } + if (!cmd.tcpm_keylen) { if (ipv6_addr_v4mapped(&sin6->sin6_addr)) return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], - AF_INET, prefixlen); + AF_INET, prefixlen, + l3index); return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr, - AF_INET6, prefixlen); + AF_INET6, prefixlen, l3index); } if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) @@ -582,12 +613,13 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, if (ipv6_addr_v4mapped(&sin6->sin6_addr)) return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], - AF_INET, prefixlen, cmd.tcpm_key, - cmd.tcpm_keylen, GFP_KERNEL); + AF_INET, prefixlen, l3index, + cmd.tcpm_key, cmd.tcpm_keylen, + GFP_KERNEL); return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr, - AF_INET6, prefixlen, cmd.tcpm_key, - cmd.tcpm_keylen, GFP_KERNEL); + AF_INET6, prefixlen, l3index, + cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); } static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp, @@ -698,17 +730,23 @@ clear_hash_noput: #endif static bool tcp_v6_inbound_md5_hash(const struct sock *sk, - const struct sk_buff *skb) + const struct sk_buff *skb, + int dif, int sdif) { #ifdef CONFIG_TCP_MD5SIG const __u8 *hash_location = NULL; struct tcp_md5sig_key *hash_expected; const struct ipv6hdr *ip6h = ipv6_hdr(skb); const struct tcphdr *th = tcp_hdr(skb); - int genhash; + int genhash, l3index; u8 newhash[16]; - hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr); + /* sdif set, means packet ingressed via a device + * in an L3 domain and dif is set to the l3mdev + */ + l3index = sdif ? dif : 0; + + hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index); hash_location = tcp_parse_md5sig_option(th); /* We've parsed the options - do we have a hash? */ @@ -732,10 +770,10 @@ static bool tcp_v6_inbound_md5_hash(const struct sock *sk, if (genhash || memcmp(hash_location, newhash, 16) != 0) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE); - net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n", + net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n", genhash ? "failed" : "mismatch", &ip6h->saddr, ntohs(th->source), - &ip6h->daddr, ntohs(th->dest)); + &ip6h->daddr, ntohs(th->dest), l3index); return true; } #endif @@ -785,7 +823,7 @@ struct request_sock_ops tcp6_request_sock_ops __read_mostly = { .syn_ack_timeout = tcp_syn_ack_timeout, }; -static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { +const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr), #ifdef CONFIG_TCP_MD5SIG @@ -951,8 +989,18 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) rcu_read_lock(); hash_location = tcp_parse_md5sig_option(th); if (sk && sk_fullsock(sk)) { - key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr); + int l3index; + + /* sdif set, means packet ingressed via a device + * in an L3 domain and inet_iif is set to it. + */ + l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; + key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); } else if (hash_location) { + int dif = tcp_v6_iif_l3_slave(skb); + int sdif = tcp_v6_sdif(skb); + int l3index; + /* * active side is lost. Try to find listening socket through * source port, and then find md5 key through listening socket. @@ -964,13 +1012,16 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) &tcp_hashinfo, NULL, 0, &ipv6h->saddr, th->source, &ipv6h->daddr, - ntohs(th->source), - tcp_v6_iif_l3_slave(skb), - tcp_v6_sdif(skb)); + ntohs(th->source), dif, sdif); if (!sk1) goto out; - key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr); + /* sdif set, means packet ingressed via a device + * in an L3 domain and dif is set to it. + */ + l3index = tcp_v6_sdif(skb) ? dif : 0; + + key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); if (!key) goto out; @@ -1040,6 +1091,10 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, struct request_sock *req) { + int l3index; + + l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; + /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV * sk->sk_state == TCP_SYN_RECV -> for Fast Open. */ @@ -1054,7 +1109,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, req->ts_recent, sk->sk_bound_dev_if, - tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr), + tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index), 0, 0, sk->sk_priority); } @@ -1126,6 +1181,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * struct sock *newsk; #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *key; + int l3index; #endif struct flowi6 fl6; @@ -1151,6 +1207,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * newnp->saddr = newsk->sk_v6_rcv_saddr; inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; + if (sk_is_mptcp(newsk)) + mptcp_handle_ipv6_mapped(newsk, true); newsk->sk_backlog_rcv = tcp_v4_do_rcv; #ifdef CONFIG_TCP_MD5SIG newtp->af_specific = &tcp_sock_ipv6_mapped_specific; @@ -1269,8 +1327,10 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * newinet->inet_rcv_saddr = LOOPBACK4_IPV6; #ifdef CONFIG_TCP_MD5SIG + l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); + /* Copy over the MD5 key from the original socket */ - key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr); + key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); if (key) { /* We're using one, so create a matching key * on the newsk structure. If we fail to get @@ -1278,7 +1338,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * * across. Shucks. */ tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr, - AF_INET6, 128, key->key, key->keylen, + AF_INET6, 128, l3index, key->key, key->keylen, sk_gfp_mask(sk, GFP_ATOMIC)); } #endif @@ -1480,6 +1540,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) { struct sk_buff *skb_to_free; int sdif = inet6_sdif(skb); + int dif = inet6_iif(skb); const struct tcphdr *th; const struct ipv6hdr *hdr; bool refcounted; @@ -1528,7 +1589,7 @@ process: struct sock *nsk; sk = req->rsk_listener; - if (tcp_v6_inbound_md5_hash(sk, skb)) { + if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) { sk_drops_add(sk, skb); reqsk_put(req); goto discard_it; @@ -1583,7 +1644,7 @@ process: if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; - if (tcp_v6_inbound_md5_hash(sk, skb)) + if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) goto discard_and_relse; if (tcp_filter(sk, skb)) @@ -1739,7 +1800,7 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = { .twsk_destructor = tcp_twsk_destructor, }; -static const struct inet_connection_sock_af_ops ipv6_specific = { +const struct inet_connection_sock_af_ops ipv6_specific = { .queue_xmit = inet6_csk_xmit, .send_check = tcp_v6_send_check, .rebuild_header = inet6_sk_rebuild_header, @@ -2108,9 +2169,16 @@ int __init tcpv6_init(void) ret = register_pernet_subsys(&tcpv6_net_ops); if (ret) goto out_tcpv6_protosw; + + ret = mptcpv6_init(); + if (ret) + goto out_tcpv6_pernet_subsys; + out: return ret; +out_tcpv6_pernet_subsys: + unregister_pernet_subsys(&tcpv6_net_ops); out_tcpv6_protosw: inet6_unregister_protosw(&tcpv6_protosw); out_tcpv6_protocol: diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 9fec580c968e..5dc439a391fe 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -690,8 +690,7 @@ static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) __skb_push(skb, -skb_mac_offset(skb)); segs = udp_rcv_segment(sk, skb, false); - for (skb = segs; skb; skb = next) { - next = skb->next; + skb_list_walk_safe(segs, skb, next) { __skb_pull(skb, skb_transport_offset(skb)); ret = udpv6_queue_rcv_one_skb(sk, skb); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 64b8f05d6735..584157a07759 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -115,8 +115,10 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb) { struct udphdr *uh = udp_gro_udphdr(skb); + struct sk_buff *pp; + struct sock *sk; - if (unlikely(!uh) || !static_branch_unlikely(&udpv6_encap_needed_key)) + if (unlikely(!uh)) goto flush; /* Don't bother verifying checksum if we're going to flush anyway. */ @@ -127,12 +129,16 @@ struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb) ip6_gro_compute_pseudo)) goto flush; else if (uh->check) - skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check, + skb_gro_checksum_try_convert(skb, IPPROTO_UDP, ip6_gro_compute_pseudo); skip: NAPI_GRO_CB(skb)->is_ipv6 = 1; - return udp_gro_receive(head, skb, uh, udp6_lib_lookup_skb); + rcu_read_lock(); + sk = static_branch_unlikely(&udpv6_encap_needed_key) ? udp6_lib_lookup_skb(skb, uh->source, uh->dest) : NULL; + pp = udp_gro_receive(head, skb, uh, sk); + rcu_read_unlock(); + return pp; flush: NAPI_GRO_CB(skb)->flush = 1; @@ -144,6 +150,23 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff) const struct ipv6hdr *ipv6h = ipv6_hdr(skb); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); + if (NAPI_GRO_CB(skb)->is_flist) { + uh->len = htons(skb->len - nhoff); + + skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4); + skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; + + if (skb->ip_summed == CHECKSUM_UNNECESSARY) { + if (skb->csum_level < SKB_MAX_CSUM_LEVEL) + skb->csum_level++; + } else { + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->csum_level = 0; + } + + return 0; + } + if (uh->check) uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr, &ipv6h->daddr, 0); |