summaryrefslogtreecommitdiffstats
path: root/net/ipv4/ip_output.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/ip_output.c')
-rw-r--r--net/ipv4/ip_output.c59
1 files changed, 42 insertions, 17 deletions
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 05d105832bdb..9af2b7853be4 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -74,6 +74,7 @@
#include <net/checksum.h>
#include <net/inetpeer.h>
#include <net/lwtunnel.h>
+#include <linux/bpf-cgroup.h>
#include <linux/igmp.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_bridge.h>
@@ -239,19 +240,23 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk,
struct sk_buff *segs;
int ret = 0;
- /* common case: fragmentation of segments is not allowed,
- * or seglen is <= mtu
+ /* common case: seglen is <= mtu
*/
- if (((IPCB(skb)->flags & IPSKB_FRAG_SEGS) == 0) ||
- skb_gso_validate_mtu(skb, mtu))
+ if (skb_gso_validate_mtu(skb, mtu))
return ip_finish_output2(net, sk, skb);
- /* Slowpath - GSO segment length is exceeding the dst MTU.
+ /* Slowpath - GSO segment length exceeds the egress MTU.
*
- * This can happen in two cases:
- * 1) TCP GRO packet, DF bit not set
- * 2) skb arrived via virtio-net, we thus get TSO/GSO skbs directly
- * from host network stack.
+ * This can happen in several cases:
+ * - Forwarding of a TCP GRO skb, when DF flag is not set.
+ * - Forwarding of an skb that arrived on a virtualization interface
+ * (virtio-net/vhost/tap) with TSO/GSO size set by other network
+ * stack.
+ * - Local GSO skb transmitted on an NETIF_F_TSO tunnel stacked over an
+ * interface with a smaller MTU.
+ * - Arriving GRO skb (or GSO skb in a virtualized environment) that is
+ * bridged to a NETIF_F_TSO tunnel stacked over an interface with an
+ * insufficent MTU.
*/
features = netif_skb_features(skb);
BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET);
@@ -281,6 +286,13 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk,
static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
unsigned int mtu;
+ int ret;
+
+ ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
+ if (ret) {
+ kfree_skb(skb);
+ return ret;
+ }
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
/* Policy lookup after SNAT yielded a new policy */
@@ -299,6 +311,20 @@ static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *sk
return ip_finish_output2(net, sk, skb);
}
+static int ip_mc_finish_output(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ int ret;
+
+ ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
+ if (ret) {
+ kfree_skb(skb);
+ return ret;
+ }
+
+ return dev_loopback_xmit(net, sk, skb);
+}
+
int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct rtable *rt = skb_rtable(skb);
@@ -336,7 +362,7 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (newskb)
NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
net, sk, newskb, NULL, newskb->dev,
- dev_loopback_xmit);
+ ip_mc_finish_output);
}
/* Multicasts with ttl 0 must not go beyond the host */
@@ -352,7 +378,7 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (newskb)
NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
net, sk, newskb, NULL, newskb->dev,
- dev_loopback_xmit);
+ ip_mc_finish_output);
}
return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
@@ -538,7 +564,6 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
{
struct iphdr *iph;
int ptr;
- struct net_device *dev;
struct sk_buff *skb2;
unsigned int mtu, hlen, left, len, ll_rs;
int offset;
@@ -546,8 +571,6 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
struct rtable *rt = skb_rtable(skb);
int err = 0;
- dev = rt->dst.dev;
-
/* for offloaded checksums cleanup checksum before fragmentation */
if (skb->ip_summed == CHECKSUM_PARTIAL &&
(err = skb_checksum_help(skb)))
@@ -580,7 +603,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
*/
if (skb_has_frag_list(skb)) {
struct sk_buff *frag, *frag2;
- int first_len = skb_pagelen(skb);
+ unsigned int first_len = skb_pagelen(skb);
if (first_len - hlen > mtu ||
((first_len - hlen) & 7) ||
@@ -1582,7 +1605,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
}
oif = arg->bound_dev_if;
- oif = oif ? : skb->skb_iif;
+ if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
+ oif = skb->skb_iif;
flowi4_init_output(&fl4, oif,
IP4_REPLY_MARK(net, skb->mark),
@@ -1590,7 +1614,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol,
ip_reply_arg_flowi_flags(arg),
daddr, saddr,
- tcp_hdr(skb)->source, tcp_hdr(skb)->dest);
+ tcp_hdr(skb)->source, tcp_hdr(skb)->dest,
+ arg->uid);
security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
rt = ip_route_output_key(net, &fl4);
if (IS_ERR(rt))