diff options
Diffstat (limited to 'net/ipv6/ip6_output.c')
-rw-r--r-- | net/ipv6/ip6_output.c | 171 |
1 files changed, 53 insertions, 118 deletions
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 54fc4c711f2c..a722a43dd668 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -117,6 +117,8 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * return res; } + IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); + rcu_read_lock(); nexthop = rt6_nexthop((struct rt6_info *)dst, daddr); neigh = __ipv6_neigh_lookup_noref(dev, nexthop); @@ -162,7 +164,13 @@ ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk, int err; skb_mark_not_on_list(segs); - err = ip6_fragment(net, sk, segs, ip6_finish_output2); + /* Last GSO segment can be smaller than gso_size (and MTU). + * Adding a fragment header would produce an "atomic fragment", + * which is considered harmful (RFC-8021). Avoid that. + */ + err = segs->len > mtu ? + ip6_fragment(net, sk, segs, ip6_finish_output2) : + ip6_finish_output2(net, sk, segs); if (err && ret == 0) ret = err; } @@ -170,6 +178,16 @@ ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk, return ret; } +static int ip6_finish_output_gso(struct net *net, struct sock *sk, + struct sk_buff *skb, unsigned int mtu) +{ + if (!(IP6CB(skb)->flags & IP6SKB_FAKEJUMBO) && + !skb_gso_validate_network_len(skb, mtu)) + return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu); + + return ip6_finish_output2(net, sk, skb); +} + static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) { unsigned int mtu; @@ -183,17 +201,14 @@ static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff #endif mtu = ip6_skb_dst_mtu(skb); - if (skb_is_gso(skb) && - !(IP6CB(skb)->flags & IP6SKB_FAKEJUMBO) && - !skb_gso_validate_network_len(skb, mtu)) - return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu); + if (skb_is_gso(skb)) + return ip6_finish_output_gso(net, sk, skb, mtu); - if ((skb->len > mtu && !skb_is_gso(skb)) || - dst_allfrag(skb_dst(skb)) || + if (skb->len > mtu || (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) return ip6_fragment(net, sk, skb, ip6_finish_output2); - else - return ip6_finish_output2(net, sk, skb); + + return ip6_finish_output2(net, sk, skb); } static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) @@ -232,12 +247,11 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(ip6_output); -bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) +bool ip6_autoflowlabel(struct net *net, const struct sock *sk) { - if (!np->autoflowlabel_set) + if (!inet6_test_bit(AUTOFLOWLABEL_SET, sk)) return ip6_default_np_autolabel(net); - else - return np->autoflowlabel; + return inet6_test_bit(AUTOFLOWLABEL, sk); } /* @@ -309,12 +323,12 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, * Fill in the IPv6 header */ if (np) - hlimit = np->hop_limit; + hlimit = READ_ONCE(np->hop_limit); if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, - ip6_autoflowlabel(net, np), fl6)); + ip6_autoflowlabel(net, sk), fl6)); hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; @@ -329,7 +343,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, mtu = dst_mtu(dst); if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { - IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); /* if egress device is enslaved to an L3 master device pass the * skb to its handler for processing @@ -369,9 +383,8 @@ static int ip6_call_ra_chain(struct sk_buff *skb, int sel) if (sk && ra->sel == sel && (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == skb->dev->ifindex)) { - struct ipv6_pinfo *np = inet6_sk(sk); - if (np && np->rtalert_isolate && + if (inet6_test_bit(RTALERT_ISOLATE, sk) && !net_eq(sock_net(sk), dev_net(skb->dev))) { continue; } @@ -448,10 +461,6 @@ static int ip6_forward_proxy_check(struct sk_buff *skb) static inline int ip6_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb); - - __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); - #ifdef CONFIG_NET_SWITCHDEV if (skb->offload_l3_fwd_mark) { consume_skb(skb); @@ -619,6 +628,8 @@ int ip6_forward(struct sk_buff *skb) } } + __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); + mtu = ip6_dst_mtu_maybe_forward(dst, true); if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; @@ -881,9 +892,11 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, mtu = IPV6_MIN_MTU; } - if (np && np->frag_size < mtu) { - if (np->frag_size) - mtu = np->frag_size; + if (np) { + u32 frag_size = READ_ONCE(np->frag_size); + + if (frag_size && frag_size < mtu) + mtu = frag_size; } if (mtu < hlen + sizeof(struct frag_hdr) + 8) goto fail_toobig; @@ -1017,9 +1030,6 @@ slow_path: return err; fail_toobig: - if (skb->sk && dst_allfrag(skb_dst(skb))) - sk_gso_disable(skb->sk); - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); err = -EMSGSIZE; @@ -1113,7 +1123,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, rcu_read_lock(); from = rt ? rcu_dereference(rt->from) : NULL; err = ip6_route_get_saddr(net, from, &fl6->daddr, - sk ? inet6_sk(sk)->srcprefs : 0, + sk ? READ_ONCE(inet6_sk(sk)->srcprefs) : 0, &fl6->saddr); rcu_read_unlock(); @@ -1283,74 +1293,6 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, } EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); -/** - * ip6_dst_lookup_tunnel - perform route lookup on tunnel - * @skb: Packet for which lookup is done - * @dev: Tunnel device - * @net: Network namespace of tunnel device - * @sock: Socket which provides route info - * @saddr: Memory to store the src ip address - * @info: Tunnel information - * @protocol: IP protocol - * @use_cache: Flag to enable cache usage - * This function performs a route lookup on a tunnel - * - * It returns a valid dst pointer and stores src address to be used in - * tunnel in param saddr on success, else a pointer encoded error code. - */ - -struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb, - struct net_device *dev, - struct net *net, - struct socket *sock, - struct in6_addr *saddr, - const struct ip_tunnel_info *info, - u8 protocol, - bool use_cache) -{ - struct dst_entry *dst = NULL; -#ifdef CONFIG_DST_CACHE - struct dst_cache *dst_cache; -#endif - struct flowi6 fl6; - __u8 prio; - -#ifdef CONFIG_DST_CACHE - dst_cache = (struct dst_cache *)&info->dst_cache; - if (use_cache) { - dst = dst_cache_get_ip6(dst_cache, saddr); - if (dst) - return dst; - } -#endif - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_mark = skb->mark; - fl6.flowi6_proto = protocol; - fl6.daddr = info->key.u.ipv6.dst; - fl6.saddr = info->key.u.ipv6.src; - prio = info->key.tos; - fl6.flowlabel = ip6_make_flowinfo(prio, info->key.label); - - dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6, - NULL); - if (IS_ERR(dst)) { - netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr); - return ERR_PTR(-ENETUNREACH); - } - if (dst->dev == dev) { /* is this necessary? */ - netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr); - dst_release(dst); - return ERR_PTR(-ELOOP); - } -#ifdef CONFIG_DST_CACHE - if (use_cache) - dst_cache_set_ip6(dst_cache, dst, &fl6.saddr); -#endif - *saddr = fl6.saddr; - return dst; -} -EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel); - static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, gfp_t gfp) { @@ -1392,7 +1334,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, struct rt6_info *rt) { struct ipv6_pinfo *np = inet6_sk(sk); - unsigned int mtu; + unsigned int mtu, frag_size; struct ipv6_txoptions *nopt, *opt = ipc6->opt; /* callers pass dst together with a reference, set it first so @@ -1436,25 +1378,23 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, v6_cork->hop_limit = ipc6->hlimit; v6_cork->tclass = ipc6->tclass; if (rt->dst.flags & DST_XFRM_TUNNEL) - mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? + mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ? READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); else - mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? + mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ? READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst)); - if (np->frag_size < mtu) { - if (np->frag_size) - mtu = np->frag_size; - } + + frag_size = READ_ONCE(np->frag_size); + if (frag_size && frag_size < mtu) + mtu = frag_size; + cork->base.fragsize = mtu; cork->base.gso_size = ipc6->gso_size; cork->base.tx_flags = 0; cork->base.mark = ipc6->sockc.mark; sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags); - if (dst_allfrag(xfrm_dst_path(&rt->dst))) - cork->base.flags |= IPCORK_ALLFRAG; cork->base.length = 0; - cork->base.transmit_time = ipc6->sockc.transmit_time; return 0; @@ -1511,8 +1451,6 @@ static int __ip6_append_data(struct sock *sk, headersize = sizeof(struct ipv6hdr) + (opt ? opt->opt_flen + opt->opt_nflen : 0) + - (dst_allfrag(&rt->dst) ? - sizeof(struct frag_hdr) : 0) + rt->rt6i_nfheader_len; if (mtu <= fragheaderlen || @@ -1622,7 +1560,7 @@ emsgsize: while (length > 0) { /* Check if the remaining data fits into current packet. */ - copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; + copy = (cork->length <= mtu ? mtu : maxfraglen) - skb->len; if (copy < length) copy = maxfraglen - skb->len; @@ -1653,7 +1591,7 @@ alloc_new_skb: */ datalen = length + fraggap; - if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) + if (datalen > (cork->length <= mtu ? mtu : maxfraglen) - fragheaderlen) datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; fraglen = datalen + fragheaderlen; pagedlen = 0; @@ -1902,7 +1840,6 @@ static void ip6_cork_steal_dst(struct sk_buff *skb, struct inet_cork_full *cork) struct dst_entry *dst = cork->base.dst; cork->base.dst = NULL; - cork->base.flags &= ~IPCORK_ALLFRAG; skb_dst_set(skb, dst); } @@ -1923,7 +1860,6 @@ static void ip6_cork_release(struct inet_cork_full *cork, if (cork->base.dst) { dst_release(cork->base.dst); cork->base.dst = NULL; - cork->base.flags &= ~IPCORK_ALLFRAG; } } @@ -1935,7 +1871,6 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, struct sk_buff *skb, *tmp_skb; struct sk_buff **tail_skb; struct in6_addr *final_dst; - struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); struct ipv6hdr *hdr; struct ipv6_txoptions *opt = v6_cork->opt; @@ -1978,18 +1913,18 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, ip6_flow_hdr(hdr, v6_cork->tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, - ip6_autoflowlabel(net, np), fl6)); + ip6_autoflowlabel(net, sk), fl6)); hdr->hop_limit = v6_cork->hop_limit; hdr->nexthdr = proto; hdr->saddr = fl6->saddr; hdr->daddr = *final_dst; - skb->priority = sk->sk_priority; + skb->priority = READ_ONCE(sk->sk_priority); skb->mark = cork->base.mark; skb->tstamp = cork->base.transmit_time; ip6_cork_steal_dst(skb, cork); - IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); + IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); if (proto == IPPROTO_ICMPV6) { struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); u8 icmp6_type; @@ -2091,7 +2026,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk, return ERR_PTR(err); } if (ipc6->dontfrag < 0) - ipc6->dontfrag = inet6_sk(sk)->dontfrag; + ipc6->dontfrag = inet6_test_bit(DONTFRAG, sk); err = __ip6_append_data(sk, &queue, cork, &v6_cork, ¤t->task_frag, getfrag, from, |